frombs4importBeautifulSoup
importrequests
importtime
importpymongo
client = pymongo.MongoClient('localhost',27017)
duanzu = client['duanzu']
sheet_lines = duanzu['sheet_lines']
url=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/?startDate=2016-06-19&endDate=2016-06-19'.format(i)foriinrange(1,3,1)]
lianjie1=[]
defsexss(valuse):
job=valuse
job3=[]
foriinjob:
job1=i.get('class')
ifjob1[0]=='member_ico1':
job2='女'
job3.append(job2)
elifjob1[0]=='member_ico':
job2='男'
job3.append(job2)
else:
job2='性别未知'
job3.append(job2)
return(job3)
deflian(url1):
wb_data=requests.get(url1)
soup=BeautifulSoup(wb_data.text,'lxml')
lianjie=soup.find_all(style='cursor:pointer')
foriinlianjie:
abc=i.get('detailurl')
url=abc
time.sleep(1)
wb_data=requests.get(url)
soup=BeautifulSoup(wb_data.text,'lxml')
titles=soup.select(' h4 > em')
addresss=soup.select('p > span.pr5')
prices=soup.select('div.day_l > span')
images=soup.find_all(id='curBigImage')
imagespeople=soup.select('div.member_pic > a > img')
sexs=soup.select('div.member_pic > div')
name_oweners=soup.select('div.w_240 > h6 > a')
job4=sexss(sexs)
info = []
fortitle,address,price,image,imagepeople,sex,name_owenerinzip(titles,addresss,prices,images,imagespeople,sexs,name_oweners):
data={
'title':title.get_text(),
'address':address.get_text(),
'price':int(price.get_text()),
'image':image.get('src'),
'imagepeople':imagepeople.get('src'),
'sex':job4,
'name_owener':name_owener.get_text()
}
info.append(data)
sheet_lines.insert_one(data)
foriininfo:
print(i['title'],i['address'],str( i['price'])+'¥',i['image'],i['imagepeople'],i['sex'],i['name_owener'])
foriinurl:
countent=lian(i)
foriteminsheet_lines.find({'price':{'$gte':500}}):
print(item)
MongoDB第一段代码,xiaozhu短租数据
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 编译环境:python v3.5.0, mac osx 10.11.4 python爬虫基础知识: Python...
- Date:2016-9-21update:2016-9-30By:Black Crow 前言: 终于进入到网络页面...