1,基础知识
- 利用pymongo库进行python与mongoDB数据库的连接
import pymongo
client = pymongo.MongoClient('localhost', 27017)
walden = client['walden'] #创建表格文件
sheet_lines = walden['sheet_tag'] #创建表格中的sheet
使用find()函数展示数据库中数据
$lt, $lte, $gt, $gte, $ne
分别对应
<, <=, >, >=, !=
l == less; g ==greater; e == equal; n == not
2,practice
爬取小猪租房中前三页的房源信息,并筛选出价格高于500RMB的房源
The Code:
import pymongo, requests, time
from bs4 import BeautifulSoup
client = pymongo.MongoClient('localhost', 27017)
walden = client['2_1homework']
sheet_lines = walden['2_1homework']
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i) for i in range(1, 4)]
def get_details(url, data = None):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'lxml')
titles = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
prices = soup.select('#page_list > ul > li > div > span.result_price > i')
#print(titles, prices)
for i in range(len(titles)):
index = i
title = titles[i].get_text()
price = prices[i].get_text()
data = {
'index' : index,
'title' : title,
'price' : float(price)
}
#print(index, title, price)
sheet_lines.insert_one(data)
def find_price(url, data = None):
for item in sheet_lines.find({'price': {'$gte' : 500}}):
print(item['title'])
for url_single in urls:
get_details(url_single)
find_price(url_single)
time.sleep(2)
3, 总结与反思
需要注意的几点:
- 如何将数据插入数据库
- 字典的创建
Practice makes perfect!