最近工作比较忙,也就没有看python了,今天下午抽空看了下代理ip
proxy = {'http':'120.26.140.95:81'} # 代理IP,我这里只写一个作为示范
查看自己的IP可以访问地址http://www.whatismyip.com.tw/
所以我们只要使用requests请求这个网站就可以查看我们的ip了,其他也就不多说了,直接上代码
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import requests
proxy = {'http':'120.26.140.95:81'}
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
url = 'http://www.whatismyip.com.tw/'
response = requests.get(url, proxies=proxy, headers=header)
response.encoding = 'utf-8'
print(response.text)
可以看到下图中查出来的IP是我使用的代理IP
代理ip可以上西刺代理爬取,那就顺便贴下吧
不能太频繁的访问,否则。。。我就这样被封了IP
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
url_list = ['http://www.xicidaili.com/nn/%d' % x for x in range(1,11,1)]
proxies = {
"http": "http://120.26.140.95:81",
"http": "http://117.21.234.107:8080",
"http": "http://122.192.74.83:8080",
"http": "http://117.122.240.153:8088",
}
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
def get_ip():
for url in url_list:
response = requests.get(url=url, headers=header, proxies=proxies)
response.encoding = 'utf-8'
html = BeautifulSoup(response.text, 'html.parser')
data = html.find_all('tr')
for data_ip in data:
if data_ip == data[0]:
pass
else:
ip_data = data_ip.find_all('td')
ip = ip_data[1].get_text()
port = ip_data[2].get_text()
print('%s:%s' % (ip, port))
if __name__ == '__main__':
get_ip()
接下去的时间准备学习下scrapy~