from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import os
from bs4 import BeautifulSoup
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")
images_all = set()
browser = webdriver.Chrome(chrome_options = chrome_options)
# browser = webdriver.PhantomJS()
try:
with open("huaban_pin_asserts_all.txt",'r',encoding="utf8") as read_file:
for index, line in enumerate(read_file.readlines()):
url = "http://huaban.com" + line.strip()
browser.get(url,)
browser.set_page_load_timeout(10000)
browser.set_script_timeout(10000)#这两种设置都进行才有效
time.sleep(1)
print(index, url)
try:
img1 = browser.find_element_by_xpath('//*[@id="baidu_image_holder"]/a/img')
if img1 != None:
images_all.add(img1.get_attribute('src'))
except Exception as e:
pass
try:
img2 = browser.find_element_by_xpath('//*[@id="baidu_image_holder"]/img')
if img2 != None:
images_all.add(img2.get_attribute('src'))
except Exception as e:
pass
time.sleep(1)
with open("huaban_images_all.txt",'w',encoding="utf8") as write_file:
for line in images_all:
write_file.write(str(line) + "\n")
except Exception as e:
browser.close()
fetch huaban big image urls
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- url:http://huaban.com/search/?q=%E6%B0%B4%E5%A2%A8%E7%94%...