1 urlopen()
给Python官网爬下来
# urlopen()
import urllib.request
response = urllib.request.urlopen('https://www.python.org')
print(response.read().decode('utf-8'))
2 查看返回的类型
#查看返回的类型
import urllib.request
response = urllib.request.urlopen('https://www.python.org')
print(type(response))
#<class 'http.client.HTTPResponse'>
这是一个HTTPResponse类型的对象,包含的方法有:
read()
readinto()
getheader(name)
getheaders()
fileno() 等。。。
属性:msg
version
status
reason
debuglevel
closed
3 再来看一个例子
#再来看一个例子
import urllib.request
response = urllib.request.urlopen('https://www.python.org')
print(response.status)
print(response.getheaders())
print(response.getheader('Server'))
#200
#这里的太长了,省略掉
#nginx
4 urlopen() 函数的API
#urlopen() 函数的API
urllib.request.urlopen(url, data=None, [timeout, ]*,
cafile=None, capath=None, cadefault=False, context=None)
5 data参数
import urllib.parse
import urllib.request
data = bytes(urllib.parse.urlencode({'word':'hello'}), encoding='utf8')
response = urllib.request.urlopen('http://httpbin.org/post', data=data)
print(response.read())
#运行结果不展示了
6 timeout参数
#这里的timeout参数的意思是,程序1秒之后,
#服务器依然没有响应,就会抛出URLError异常
import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get', timeout=1)
print(response.read())
#结果不展示了
import socket
import urllib.request
import urllib.error
try:
response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
print(response.read())
except urllib.error.URLError as e:
if isinstance(e.reason, socket.timeout):
print('TIME OUT')
7 Request
class urllib.request.Request(url, data=None, headers={},
origin_req_host=None, unverifiable=False, method=None)
看一个例子
传入多个参数构建请求
# 传入多个参数构建请求
from urllib import request, parse
url = 'http://httpbin.org/post'
headers = {
'User-Agent':'Mozilla/4.0(compatible;MSIC 5.5;Windows NT)',
'Host':'httpbin.org'
}
dict = {
'name':'Germey'
}
data = bytes(parse.urlencode(dict), encoding='utf8')
req = request.Request(url=url, data=data, headers=headers, method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))