from urllib.request import quote, unquote
import requests
from bs4 import BeautifulSoup
import sys
import time
content = input('请输入你想要查找的小说名:')
initial_content = content
keyword = quote(initial_content,encoding='gb2312')#笔趣阁的是gb2312编码
url = 'http://www.biquge.com.tw/modules/article/soshu.php?searchkey='+keyword
print(url)
re = requests.get(url) #笔趣阁搜索url
retype=re.apparent_encoding
re.encoding = retype
print(re.status_code)
html = re.text
soup = BeautifulSoup(html, 'html.parser')
fileName = '/Users/john/Desktop/小说/'+initial_content+'.txt'
print(fileName)
file = open(fileName, 'a', encoding='utf-8')
chapters = soup.find_all(id='list')
info = soup.find_all(id='info')
for link in info:
file.write(link.get_text())#书籍作者信息
download_soup = BeautifulSoup(str(chapters), 'html.parser')
arr = []
for child in download_soup.dl.children: #dl下所有子节点
if hasattr(child, 'href') and child.a != None:
arr.append(child.get_text())
numbel = len(arr)
print(numbel)
index= 1
time1 = time.time ()#获取当前时间(秒)
for child in download_soup.dl.children: #dl下所有子节点
if hasattr(child, 'href') and child.a != None:
file.write(child.get_text() + '\n' + '-----------------------------------------------' + '\n')
url = 'http://www.biquge.com.tw/' + child.a['href']
# print(url)
reponse_dl = requests.get(url)
type_dl = reponse_dl.apparent_encoding
reponse_dl.encoding = type_dl
html_dl = reponse_dl.text
soup_dl = BeautifulSoup(html_dl, 'html.parser')
contents = soup_dl.find_all(id='content') #带着<div id="content">
# soup_text = BeautifulSoup(str(contents), 'lxml')
# print(soup_text)
for link in contents:
#print(link.get_text())
file.write(link.get_text() + '\n\n')
print("已下载:%.3f%%" % float(index / numbel*100))#爬取进度
index += 1
time2 = time.time()
tt = (time2 - time1)
print('花费时间:' + str(tt) + '秒')
file.close()
主要是根据笔趣阁的搜索url,加上书名,拿到目录,然后一章章下载到本地,我是mac,window需要改下保存地址
python爬虫-笔趣阁
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 声明:本文讲解的实战内容,均仅用于学习交流,请勿用于任何商业用途! 一、前言 强烈建议:请在电脑的陪同下,阅读本文...
- 关于bs4,官方文档的介绍已经非常详细了,传送:Beautifulsoup 4官方文档,这里我把它组织成自己已经消...
- Date:2016-9-21update:2016-9-30By:Black Crow 前言: 终于进入到网络页面...
- Python爬虫入门(urllib+Beautifulsoup) 本文包括:1、爬虫简单介绍2、爬虫架构三大模块3...