mxd's EPG 报错 [SSL: CERTIFICATE_VERIFY_FAILED]
报错:
HTTPSConnectionPool(host='www.tvsou.com', port=443): Max retries exceeded with url: /epg/XIAMENTV-1/w1 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:992)')))
解决办法:
忽略证书验证(不推荐在生产环境中使用) 没办法:
如果你确定连接的目标是安全的,可以在请求中添加参数来忽略证书验证。
方法1:修改epg中的tvsou spider为以下代码:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from bs4 import BeautifulSoup as bs
import datetime
# 禁用SSL警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
# 定义请求头
headers = {
'UserAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.0.1 Safari/537.36 CrestekkEPG/3.1.0'
}
def get_desc_tvsou(url):
try:
res = requests.get(url, headers=headers, timeout=5, verify=False) # 添加verify=False参数
res.encoding = 'utf-8'
soup = bs(res.text, 'html.parser')
s = soup.select('div.prog_content_txt')[0].text
desc = s.replace('\n', '').replace(' ', '').replace('\t','').replace('剧情简介:','').replace(' ','')
except Exception as e:
desc = ''
return desc
def get_epgs_tvsou(channel, channel_id_, dt, func_arg):
epgs = []
msg = ''
success = 1
desc = ''
need_weekday = dt.weekday() + 1
if "#" in channel_id_:
channel_id,sort_class = channel_id_.split('#')
url = 'https://www.tvsou.com/epg/%s/w%s' % (channel_id, need_weekday)
else:
url = 'https://www.tvsou.com/epg/%s/w%s' % (channel_id_, need_weekday)
try:
res = requests.get(url, headers=headers, timeout=5, verify=False) # 添加verify=False参数
res.encoding = 'utf-8'
soup = bs(res.text, 'html.parser')
rows = soup.select('table.layui-table')[0].select('tr')
except Exception as e:
msg = 'spider-tvsou-连接失败,%s' % e
success = 0
ret = {
'success': success,
'epgs': epgs,
'msg': msg,
'last_program_date': dt,
'ban': 0,
}
return ret
program_urls = {}
for row in rows:
try:
if row.select('td > a'):
starttime_str = row.select('td > a')[0].text
title = row.select('td > a')[1].text
else:
starttime_str = row.select('td')[0].text
title = row.select('td')[1].text
starttime = datetime.datetime.combine(dt, datetime.time(int(starttime_str[:2]), int(starttime_str[-2:])))
program_url = None
if row.select('td > a') and 'href' in row.select('td > a')[0].attrs:
program_url = 'https:' + row.select('td > a')[0].attrs['href']
program_url = program_url.replace(' ', '')
if program_url in program_urls:
desc = program_urls[program_url]
else:
if len(program_url) > 17:
desc = get_desc_tvsou(program_url)
program_urls.update({program_url: desc})
else:
program_url, desc = '', ''
else:
program_url, desc = '', ''
epg = {
'channel_id': channel['id'],
'starttime': starttime,
'endtime': None,
'title': title,
'desc': desc,
'program_date': dt,
}
epgs.append(epg)
except Exception as e:
msg = 'spider-tvsou-FOR:%s' % e
continue
ret = {
'success': success,
'epgs': epgs,
'msg': msg,
'last_program_date': dt,
'ban': 0,
}
return ret
def get_channels_tvsou():
channels = []
host = 'https://www.tvsou.com'
url = '%s/%s'%(host,'epg/difang/')
try:
res = requests.get(url, headers=headers, verify=False) # 添加verify=False参数
res.encoding = 'utf-8'
soup = bs(res.text,'html.parser')
div_sorts = soup.select('div.pd_list > div.pd_tit')
div_channels = soup.select('div.pd_list > div.pd_con > ul')
n = 0
for div_channel in div_channels:
sort_name = div_sorts[n].text.strip()
lis = div_channel.select('li')
n += 1
for li in lis:
name = li.a.text.strip()
url = host + li.a['href']
id = li.a['href'].replace('epg','').replace('/','')
if len(id) < 5:
continue
channel = {
'name': name,
'id': [id],
'url': url,
'source': 'tvsou',
'logo': '',
'desc': '',
'sort':sort_name,
}
channels.append(channel)
print('共有:%s 个分类,%s 个频道'%(n,len(channels)))
except Exception as e:
print(f"获取频道信息失败:{e}")
return channels
if __name__ == "__main__":
channels = get_channels_tvsou()
if channels:
for channel in channels:
dt = datetime.date.today()
channel_id = channel['id'][0]
epg_data = get_epgs_tvsou(channel, channel_id, dt, None)
if epg_data['success']:
print(f"成功获取频道 {channel['name']} 的节目单")
else:
print(f"获取频道 {channel['name']} 的节目单失败:{epg_data['msg']}")
mxd's EPG 报错 [SSL: CERTIFICATE_VERIFY_FAILED]
https://blog.mxdyeah.top/mxdyeah_blog_post/55.html