Judge_end.py
函数功能:判断是否已读到最后一页
import requests
from bs4 import BeautifulSoup
def judge_end(url):
html=requests.get(url)
bs_html=BeautifulSoup(html.text,'lxml')
snumber=bs_html.select('#infocont > span > b')
number=eval(snumber[0].get_text())
if number!=0:
return True
else:
return False
'''
#infolist > div > ul > div.boxlist > ul > li:nth-child(1)
http://bj.58.com/shoujihao/pn3
http://bj.58.com/shoujihao/pn200/
'''
Main.py
函数功能:获取所有的手机链接
from PhoneInf import get_Phone_Href
urls=['http://bj.58.com/shoujihao/pn{}'.format(str(i)) for i in range(1,130)]
def main():
for url in urls:
get_Phone_Href(url)
main()
PhoneInf.py
函数功能:获取手机号码,具体页面链接地址的信息等
from bs4 import BeautifulSoup
import requests
import time
from Judge_end import judge_end
import pymongo
client=pymongo.MongoClient('localhost',27017)
TC58_phone=client['TC58_phone0']
phoneHref=TC58_phone['phoneHref']
def get_Phone_Href(url):
if judge_end(url):
html=requests.get(url)
bs_html=BeautifulSoup(html.text,'lxml')
phone_numbers=bs_html.select('#infolist > div > ul > div.boxlist > ul > li > a.t > strong[class="number"]')
phone_href=bs_html.select('#infolist > div > ul > div.boxlist > ul > li > a.t')
for number,href in zip(phone_numbers,phone_href):
phone_href_data={
'number':number.get_text(),
'href':href.get('href'),
'read':'none'
}
phoneHref.insert_one(phone_href_data)
time.sleep(2)
print(phoneHref.count())
else:
pass