from bs4 import BeautifulSoup
import requests
import time
sourceurls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 14)]
detail_urls = []
detailku = []
def get_detail_url(urls):
web_data = requests.get(urls)
time.sleep(2)
soup = BeautifulSoup(web_data.text, 'lxml')
for url in soup.select('#page_list > ul > li > a'):
detail_url = url.get('href')
detail_urls.append(detail_url)
print(detail_urls, len(detail_urls))
# sourceurls 是一个列表,所以还需要一个个的取出来放到函数里
for single_url in sourceurls:
get_detail_url(single_url)
def get_dtail_info(url):
web_data = requests.get(url)
time.sleep(1)
soup = BeautifulSoup(web_data.text, 'lxml')
titles = soup.select('h4 > em')
areas = soup.select('span.pr5')
day_prices = soup.select('div.day_l > span')
house_pics = soup.select('#curBigImage')
landlord_pics = soup.select('div.member_pic > a > img')
if soup.find_all('div', 'member_ico'):
landlord_genders = '男'
else:
landlord_genders = '女'
landlord_names = soup.select('a.lorder_name')
for title, area, day_price, house_pic, landlord_pic, landlord_gender, landlord_name in zip(titles, areas, day_prices,
house_pics, landlord_pics,
landlord_genders,
landlord_names):
data = {
'title': title.get_text(),
'area': area.get_text(),
'day_price': day_price.get_text(),
'house_pic': house_pic.get('src'),
'landlord_pic':landlord_pic.get('src'),
'landlord_gender': landlord_gender,
'landlord_name': landlord_name.get_text()
}
detailku.append(data)
print(data, len(detailku))
for detail_sinngle_url in detail_urls:
get_dtail_info(detail_sinngle_url)
第一周第三课时
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 如何进行刻意练习 选自《哪来的天才——练习中的平凡与伟大》 R·阅读原文片段 刻意练习有着几个特征,每一个都值得好...