import requests
import re
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}
def parse_page(url):
response=requests.get(url,headers=headers)
text=response.text
# print(text)
contents=re.findall(r'<div\sclass="content">.*?<span>(.*?)</span>',text,re.S)
duanzi=[]
for content in contents:
x=re.sub(r'<.*?>','',content).strip()
duanzi.append(x)
print(x)
pass
def main():
x=range(1,11)
for i in x:
url='https://www.qiushibaike.com/text/page/{}/'.format(i)
parse_page(url)
if __name__=='__main__':
main()
结果如下: