#爬取微博好友圈内容
import requests
import json
import re
headers = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36',
'cookie':'SSOLoginState=1560404105; ALF=1562996105; SCF=AuVhQEh4SshgI_tE32fQGS7ByXPRtNvPQait3IiKkoX7agpW_nl3m7DwIwUJIFDlutumzYmgjiBY-djZ54vQ0tM.; SUB=_2A25wBZDZDeRhGeRL41YY8inJyTmIHXVTCTCRrDV6PUNbktAKLUnYkW1NUvHUPIe9y7-kszq23AaY_NsLWBJnMqo0; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhlkSzPL69hk93TYFWZeAVx5JpX5KMhUgL.Fozf1hB4eoMfeo-2dJLoIc.LxKqL1hnL1K2LxKqL1KnLB-qLxKqLB-BLBKeLxK-L1-eLBKnLxK-L1K5LB-eLxKnLB-qL1hBLxK-L1h.LBKMLxKML1-2L1hBLxK-LBo5L12qLxKBLB.zL1K.LxK-LBK-LBoSKUgf_9Pzt; SUHB=0rr6WLVjpfF9wa; MLOGIN=1; _T_WM=91003095608; WEIBOCN_FROM=1110005030; M_WEIBOCN_PARAMS=luicode%3D20000174%26uicode%3D20000174; XSRF-TOKEN=bc06bd'
}
url ='https://m.weibo.cn/feed/circle?'
def get_info(url, page):
res = requests.get(url, headers=headers)
json_data = json.loads(res.text)
print(json_data)
statuses = json_data['data']['statuses']
for statusein statuses:
text = statuse['text']
new_text = re.sub('[a-zA-Z0-9<="-":;>//../////_-]+', '', text, re.S)
print(new_text)
next_cursor = json_data['data']['next_cursor']
page = page +1
if page <=18:
next_url ='https://m.weibo.cn/feed/circle?max_id={}'.format(next_cursor)
get_info(next_url, page)
else:
pass
get_info(url, 1)