1、json、requests爬虫
例子:豆瓣热门电视剧——xhr获取信息
import requests
import json
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"}
url = 'https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0'
response = requests.get(url, headers=headers)
print(response.status_code)
# 获取响应
data = response.content.decode()
# print(data)
# print(type(data))
# 把json 字符串转化成 python类型
dict_data = json.loads(data)
# print(type(dict_data))
# print(dict_data)
# 提取数据
tv_list = dict_data['subjects']
# print(tv_list)
data_list = []
# 遍历
for tv in tv_list:
temp = {}
# 电视剧名
temp['title'] = tv['title']
# rate
temp['rate'] = tv['rate']
# 封面cover
temp['cover'] = tv['cover']
# url
temp['url'] = tv['url']
# [{},{},{}]
data_list.append(temp)
for tv in data_list:
print(tv)
# 存储为json
f = open('douban.json', 'w', encoding='utf-8')
# 写入 ensure_ascii指定为false
json.dump(data_list, f, ensure_ascii=False)