视频重点
练习代码
1. 如何发现异步数据
右键 - 检查 - network - xhr - 在翻页通过监视器记录 - 查看网络请求的hearder cookies等
2. 练习代码
import requests
from bs4 import BeautifulSoup
import time
import urllib.request
proxy = {'http': 'http://127.0.0.1:8787', 'https': 'https://127.0.0.1:8787'}
path = 'D:/TS'
urls = ['http://weheartit.com/inspirations/taylorswift?page={}'.format(i) for i in range(2)]
herfs = []
for url in urls:
wb_data = requests.get(url, proxies=proxy, verify=False)
soup = BeautifulSoup(wb_data.text, 'lxml')
imgs = soup.find_all('img', alt="Taylor Swift")
for img in imgs:
herfs.append(img.get("src"))
time.sleep(2)
print('一共找到{}张图片'.format(len(herfs)))
for herf in herfs:
urllib.request.urlretrieve(herf,path+url.split('/')[-2] + url.split('/')[-1] )
print("下载完成")