方式一urllib:
导入包
'''
from urllib import request,parse
from http import cookiejar
'''
登录时需要POST的数据
date = {
'email':email,
'password':password
}
post_data = parse.urlencode(data).encode('utf-8')
设置请求头
refer字段代表来源地
headers = {
'User-agent':'xxx',
'cookie':'xxx',
'refer':'xxx'
}
登录时表单提交到的地址(用开发者工具可以看到)
login_url = ' http://xxxxx'
构造登录请求
req = request.Request(login_url, headers = headers, data = post_data)
构造cookie
cookie = cookiejar.CookieJar()
由cookie构造opener
handler = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(handler)
发送登录请求,此后这个opener就携带了cookie,以证明自己登录过
resp = opener.open(req)
登录后才能访问的网页
url = 'xxx'
构造访问请求
req = urllib.request.Request(url, headers = headers)
resp = opener.open(req)
print(resp.read().decode('utf-8'))
方式二requests:
import requests
登录后才能访问的网页
url = 'xxxx'
浏览器登录后得到的cookie,也就是刚才复制的字符串
cookie_str = r'JSESSIONID=xxxxxxxxxxxxxxxxxxxxxx; iPlanetDirectoryPro=xxxxxxxxxxxxxxxxxx'
把cookie字符串处理成字典,以便接下来使用
cookies = {}
for line in cookie_str.split(';'):
key, value = line.split('=', 1)
cookies[key] = value
设置请求头
headers = {'User-agent':'xxx'}
在发送get请求时带上请求头和cookies
resp = requests.get(url, headers = headers, cookies = cookies)
print(resp.content.decode('utf-8'))