import requests
from lxmlimport etree
import xlwt
all_info_list=[]
author_list = []
stat_list = []
comment_list = []
content_list = []
def get_info(url):
res = requests.get(url)
html = etree.HTML(res.text)
author_infos = html.xpath('//div[@class="author clearfix"]//h2')
for infoin author_infos:
author_name = info.text
# print(author_name)
info_list = [author_name]
author_list.append(info_list)
stats_infos = html.xpath('//span[@class="stats-vote"]//i[@class="number"]')
for infoin stats_infos:
stats_count = info.text
print(stats_count)
# name = all_info_list[i][0]
info_list = [stats_count]
stat_list.append(info_list)
stats_infos = html.xpath('//span[@class="stats-comments"]//i[@class="number"]')
for infoin stats_infos:
stats_comments = info.text
print(stats_comments)
info_list = [stats_comments]
comment_list.append(info_list)
content_infos = html.xpath('//div[@class="content"]//span')
for infoin content_infos:
if info.attrib =='':
content = info.text
# print(content)
info_list = [content]
content_list.append(info_list)
if __name__ =='__main__':
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('Sheet1')
header = ['username','stat','song','time']
for tin range(len(header)):
sheet.write(0, t, header[t])
urls = ['https://www.qiushibaike.com/text/page/{}/'.format(str(i))for iin range(1,100)]
for urlin urls:
get_info(url)
i =1
for listin author_list:
sheet.write(i, 0, list)
i +=1
i =1
for listin stat_list:
sheet.write(i, 1, list)
i +=1
i =1
for listin comment_list:
sheet.write(i, 2, list)
i +=1
i =1
for listin content_list:
sheet.write(i, 3, list)
i +=1
book.save('E:/python/第三讲/test2222.xls')