本文最后更新于 158 天前,其中的信息可能已经有所发展或是发生改变。
报错:
HTTPSConnectionPool(host='www.tvsou.com', port=443): Max retries exceeded with url: /epg/XIAMENTV-1/w1 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:992)')))
解决办法:
忽略证书验证(不推荐在生产环境中使用) 没办法:
如果你确定连接的目标是安全的,可以在请求中添加参数来忽略证书验证。
方法1:修改epg中的tvsou spider为以下代码:
import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning from bs4 import BeautifulSoup as bs import datetime # 禁用SSL警告 requests.packages.urllib3.disable_warnings(InsecureRequestWarning) # 定义请求头 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.0.1 Safari/537.36 Mxdepg/1.0.0' } def get_desc_tvsou(url): try: res = requests.get(url, headers=headers, timeout=5, verify=False) # 添加verify=False参数 res.encoding = 'utf-8' soup = bs(res.text, 'html.parser') s = soup.select('div.prog_content_txt')[0].text desc = s.replace('\n', '').replace(' ', '').replace('\t','').replace('剧情简介:','').replace(' ','') except Exception as e: desc = '' return desc def get_epgs_tvsou(channel, channel_id_, dt, func_arg): epgs = [] msg = '' success = 1 desc = '' need_weekday = dt.weekday() + 1 if "#" in channel_id_: channel_id,sort_class = channel_id_.split('#') url = 'https://www.tvsou.com/epg/%s/w%s' % (channel_id, need_weekday) else: url = 'https://www.tvsou.com/epg/%s/w%s' % (channel_id_, need_weekday) try: res = requests.get(url, headers=headers, timeout=5, verify=False) # 添加verify=False参数 res.encoding = 'utf-8' soup = bs(res.text, 'html.parser') rows = soup.select('table.layui-table')[0].select('tr') except Exception as e: msg = 'spider-tvsou-连接失败,%s' % e success = 0 ret = { 'success': success, 'epgs': epgs, 'msg': msg, 'last_program_date': dt, 'ban': 0, } return ret program_urls = {} for row in rows: try: if row.select('td > a'): starttime_str = row.select('td > a')[0].text title = row.select('td > a')[1].text else: starttime_str = row.select('td')[0].text title = row.select('td')[1].text starttime = datetime.datetime.combine(dt, datetime.time(int(starttime_str[:2]), int(starttime_str[-2:]))) program_url = None if row.select('td > a') and 'href' in row.select('td > a')[0].attrs: program_url = 'https:' + row.select('td > a')[0].attrs['href'] program_url = program_url.replace(' ', '') if program_url in program_urls: desc = program_urls[program_url] else: if len(program_url) > 17: desc = get_desc_tvsou(program_url) program_urls.update({program_url: desc}) else: program_url, desc = '', '' else: program_url, desc = '', '' epg = { 'channel_id': channel['id'], 'starttime': starttime, 'endtime': None, 'title': title, 'desc': desc, 'program_date': dt, } epgs.append(epg) except Exception as e: msg = 'spider-tvsou-FOR:%s' % e continue ret = { 'success': success, 'epgs': epgs, 'msg': msg, 'last_program_date': dt, 'ban': 0, } return ret def get_channels_tvsou(): channels = [] host = 'https://www.tvsou.com' url = '%s/%s'%(host,'epg/difang/') try: res = requests.get(url, headers=headers, verify=False) # 添加verify=False参数 res.encoding = 'utf-8' soup = bs(res.text,'html.parser') div_sorts = soup.select('div.pd_list > div.pd_tit') div_channels = soup.select('div.pd_list > div.pd_con > ul') n = 0 for div_channel in div_channels: sort_name = div_sorts[n].text.strip() lis = div_channel.select('li') n += 1 for li in lis: name = li.a.text.strip() url = host + li.a['href'] id = li.a['href'].replace('epg','').replace('/','') if len(id) < 5: continue channel = { 'name': name, 'id': [id], 'url': url, 'source': 'tvsou', 'logo': '', 'desc': '', 'sort':sort_name, } channels.append(channel) print('共有:%s 个分类,%s 个频道'%(n,len(channels))) except Exception as e: print(f"获取频道信息失败:{e}") return channels if __name__ == "__main__": channels = get_channels_tvsou() if channels: for channel in channels: dt = datetime.date.today() channel_id = channel['id'][0] epg_data = get_epgs_tvsou(channel, channel_id, dt, None) if epg_data['success']: print(f"成功获取频道 {channel['name']} 的节目单") else: print(f"获取频道 {channel['name']} 的节目单失败:{epg_data['msg']}")