# -*- coding: UTF-8 -*-
import requests
import re
import csv
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36'}
# 建立五个空列表,每个列表代表csv中的一列(column)
nickname = []
ratedate = []
color = []
size = []
ratecontent = []
# 取第一页到第十页的评价
urls=[]
for i in range(1,10):
urls.append('https://rate.tmall.com/list_detail_rate.htm?itemId=521136254098&spuId=345965243&sellerId=2106525799&order=1¤tPage=%s' %i)
for url in urls:
content = requests.get(url,headers=header).text
# 正则表达式提取对应的五个数据
nickname.extend(re.findall(re.compile('"displayUserNick":"(.*?)"'),content))
color.extend(re.findall(re.compile('"auctionSku":(.*?);'),content))
size.extend(re.findall(re.compile('尺码:(.*?);'),content))
ratecontent.extend(re.findall(re.compile('"rateContent":"(.*?)","rateDate"'),content))
ratedate.extend(re.findall(re.compile('"rateDate":"(.*?)","reply"'),content))
print(nickname[1]) # 测试数据是否正确
# 追加设定
out=open('D:\\test\\tmall.csv','a',newline='')
# 写入设定
csv_writer = csv.writer(out, dialect = "excel")
# 将所有的列转化为行
for i in range(0,len(color)):
eachrow=[nickname[i],color[i],size[i],ratecontent[i],ratedate[i]]
csv_writer.writerow(eachrow)
out.close()