#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import logging,os,shutil,requests
from lxmlimport etree
logging.basicConfig(level=logging.INFO)
url="https://www.baidu.com"
headers={}
headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
res=requests.get(url,headers=headers)
try:
if res.status_code==200:
selector=etree.HTML(res.text)#使用etree.HTML处理源代码
result=etree.tostring(selector)#使用tostring 方法,可以看到刚才建立的 xml文件全貌
logging.info(result)#输出网页内容
#使用Xpath提取内容
img_urls=selector.xpath("//*[@id='lg']/img[1]/@src")
print(img_urls)
if img_urls:
img_url=img_urls[0]
img_url=img_urlif img_url.startswith("http")else "http:"+img_url
logging.info(img_url)
#先删除本地在下载
filename="logo1.png"
if os.path.isfile(filename):
os.remove(filename)
#获取图片数据流
res=requests.get(img_url,stream=True)
with open(filename,"wb")as out_file:
shutil.copyfileobj(res.raw,out_file)
else:
logging.info("查找元素失败")
else:
print("网页异常")
except ConnectionError:
print("连接异常")