爬取图片并保存本地
import requests
from bs4 import BeautifulSoup
import urllib
def img_parse(img_url,title):
print(img_url)
#设置UA
req_header = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
response = requests.get(img_url,headers=req_header)
# print(response.text)
#使用
result = BeautifulSoup(response.text,'lxml')
img = result.select('.border-wrap img')[0]
# print('https://github.com{}'.format(img['src']))
c = 'https://github.com{}'.format(img['src'])
# response1 = urllib.request.urlopen(c)
response1 = requests.get(c)
#使用with open将图片保存在本地
with open('{}'.format(title),'wb') as file:
# file.write(response1.read())
file.write(response1.content)
def img_cover():
url = 'https://github.com/phoenixshow/FrontCode/tree/master/buickmall/img'
req_header = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
response = requests.get(url,headers=req_header)
result = BeautifulSoup(response.text,'lxml')
# print(result)
img = result.select('tbody tr .content span a')
for i in img:
# print('https://github.com{}'.format(i['href']))
#拼接路径
img_url = 'https://github.com{}'.format(i['href'])
# print(i.get_text())
title = i.get_text()
# 调用img_parse解析函数
img_parse(img_url,title)
if __name__ == '__main__':
img_cover()