** 有需要交流的联系QQ:2544100193 **
代码
from urllib import request
import re
req = request.Request('https://www.qiushibaike.com/8hr/page/3/?s=4998909')
req.add_header('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Safari/602.1.50')
with request.urlopen(req) as f:
print('Status:',f.status,f.reason)
for k,v in f.getheaders():
print('%s:%s'%(k,v))
html = f.read().decode('utf-8')
re_x = re.compile(r'<span>.*?</span>')
content = re.findall(re_x,html)
for item in content:
if not re.search("img",item):
print(item)