2018-10-23 09:55:46 补充
import requests
response = requests.get("www.baidu.com",timeout=10)
coding = response.apparent_encoding # GB2312 或者 其他
# 或者 从html的meta中抽取
# coding = requests.utils.get_encodings_from_content(response.text)[0]
response.encoding = coding
print(response.text)
2018-10-10 10:33:15 添加
import requests
from bs4 import BeautifulSoup
response = requests.get("www.baidu.com",timeout=10)
if response:
soup = BeautifulSoup(response.text, 'lxml')
try: # 自动转码为网页需要的编码
meta = soup.meta.attrs
meta = meta['content'].split("=")[1] # 获取编码
response.encoding = meta # Requests库的自身编码为: r.encoding = ‘ISO-8859-1’
response = response.text
print(response)
except BaseException as e:
pass
# 参照地址: https://blog.csdn.net/chaowanghn/article/details/54889835