1. MongoDB 简介
MongoDB -- document database
-- not .pdf or .doc/.docx
-- is associative array
-- document == json object
-- document == php array
-- document == python dict
-- document == ruby hash
你可以在这个官方网站了解 JSON 数据格式。
4. 为何使用Mongodb
-- flexible schema
-- oriented toward programmers
-- flexible deployment
-- designed for big data
-- aggregation framework
你可以从官方 MongoDB 页面下载安装 MongoDB。你还可以阅读具体的 MongoDB 安装说明。
对于本课程中的大多数练习,你无需在计算机上安装 MongoDB,但是要想获得最好的学习体验,我们建议你这么做。安装既快捷又简单!
MongoDB 有大量驱动程序和客户端库。我们将在本课程中使用的是 PyMongo。请查阅官方文档,以了解 PyMongo 安装说明。
5 预先了解MongoDB
安装 pymongo 以便在本地运行此代码:
pip install pymongo
def add_city(db):
db.cities.insert({"name" : "Chicago"})
def get_city(db):
return db.cities.find_one()
def get_db():
# For local use
from pymongo import MongoClient
client = MongoClient('localhost:27017')
# 'examples' here is the database name. It will be created if it does not exist.
db = client.examples
return db
if __name__ == "__main__":
# For local use
# db = get_db() # uncomment this line if you want to run this locally
add_city(db)
print get_city(db)
8.PyMongo简介
tesla_s = {
"manufacturer" : "Tesla Motors",
"class" : "full-size",
"body style" : "5-door liftback",
"production" : [2012,2013],
"model years" : [2013],
"layout" : ["Rear-motor","rear-wheel drive"],
"designer" : {
"firstname":"Franz",
"surname":"von Holzhausen"
},
"assembly" : [
{
"country":"United State",
"city" : "Fremont",
"state" : "california"
},
{
"country":"the netherlands",
"city":"tilburg"
}
]
}
from pymongo import MongoClient
import pprint
client = MongoClient('mongodb://localhost:27017/') #创建客户端对象,指定连接字符串
tesla_s={}
db = client.examples #指定我们需要使用的示例数据库
db.autos.insert(tesla_s) #insert document 'tesla_s' in the autos collection for the example database
#将文档tesla_s保存在集合autos的示例数据库中
for a in db.autos.find(): #db.autos.find()返回autos集合中所有文档的指针
pprint.pprint(a)
MongoDB ensures that any document we insert can be uniquely identified by it's _id field,and if we don's specify value for _id,mongoDB will create one for us.
9.使用字段选择进行查询
We construct the query document,having the fields and values for those fields that we'd like to see in every documen in our result set.
We're simply looping through our results and printing out each one of them.
def find():
autos = db.autos.find({'manufacturer':'Toyota'})
for a in autos:
pprint.pprint(a)
示例
"""
Your task is to complete the 'porsche_query' function and in particular the query
to find all autos where the manufacturer field matches "Porsche".
"""
def porsche_query():
# Please fill in the query to find all autos manuafactured by Porsche.
query = {"manufacturer" : "Porsche"}
return query
# Code here is for local use on your own computer.
def get_db(db_name):
# For local use
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client[db_name]
return db
def find_porsche(db, query):
# For local use
return db.autos.find(query)
if __name__ == "__main__":
# For local use
db = get_db('examples')
query = porsche_query()
results = find_porsche(db, query)
print "Printing first 3 results\n"
import pprint
for car in results[:3]:
pprint.pprint(car)
11.多项字段查询
def find():
autos = db.autos.find({'manufacturer':'Toyota','class':'mid-size car'})
for a in autos:
pprint.pprint(a)
12.投影查询
def find():
query = {'manufacturer':'Toyota','class':'mid-size car'}
projectiong = {'_id':0,'name':1}
autos = db.autos.find({'manufacturer':'Toyota','class':'mid-size car'})
for a in autos:
pprint.pprint(a)
13.将数据导入 MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client.examples
num_autos = db.myautos.find().count()
print "num_autos before:",num_autos
for a in autos:
db.myautos.insert(a)
num_autos = db.myautos.find().count()
print "num_autos after",num_autos
14 插入多个文档
"""
Add a single line of code to the insert_autos function that will insert the
automobile data into the 'autos' collection. The data variable that is
returned from the process_file function is a list of dictionaries, as in the
example in the previous video.
"""
from autos import process_file
def insert_autos(infile, db):
data = process_file(infile)
# Add your code here. Insert the data in one command.
db.autos.insert(data)
if __name__ == "__main__":
# Code here is for local use on your own computer.
from pymongo import MongoClient
client = MongoClient("mongodb://localhost:27017")
db = client.examples
insert_autos('autos-small.csv', db)
print db.autos.find_one()
15.使用mongoimport
将所有文档输出成JSON文档
实际两个步骤:
1.数据清理
2.将数据导入MongoDB
查看帮助文档:
mongoimport --help
mongoimport -d examples -c myautos2 --file autos.json
-d examples 指定数据库
-c myautos2 指定存储数据的集合
--file autos.json 指定实际导入的文件名 该文件和位于examples文件夹内
16.运算符
不等式运算符
$gt (>)
$lt(<)
$gte(≥)
$lte(≤)
$nt(≠)
def find():
query={'polulation':{'$gt':250000}}
cities=db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'polulation':{'$gt':250000,'$lte':500000}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'name':{'$gte':'X','$lt':'Y'}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'foundingDate':{'$gte':datetime(1837,1,1),'$lte':datetime(1837,12,31)}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'country':{'$ne':'United States'}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
"""
Your task is to write a query that will return all cities
that are founded in 21st century.
Please modify only 'range_query' function, as only that will be taken into account.
"""
from datetime import datetime
def range_query():
# Modify the below line with your query.
# You can use datetime(year, month, day) to specify date in the query
query = {"foundingDate":{"$gte":datetime(2001,1,1)}}
return query
# Do not edit code below this line in the online code editor.
# Code here is for local use on your own computer.
def get_db():
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client.examples
return db
if __name__ == "__main__":
# For local use
db = get_db()
query = range_query()
cities = db.cities.find(query)
print "Found cities:", cities.count()
import pprint
pprint.pprint(cities[0])
19.存在(exists运算符)
要在本地启动 mongo shell,请在终端中输入以下命令:
mongo
>use examples
switched to db examples
>db.cities.find() #将返回所有结果
exists运算符允许我们基于文档是否包含特殊字符来检索文档
db.cities.find({"governmentType":{"$exists":1}}).count() #{"$exists":1}表示存在 count()表示对查询结果计数
db.cities.find({"governmentType":{"$exist":0}}).pretty() #{"$exists":0}表示不存在 pretty()表示查看其中的一个文档
20.正则运算符($regex)
MongoDB支持使用$regex查询字符串模式
$regex
--based on a regular expression library specially PCRE(perl compatible regular expression library)
--allow us to do regular expression queries in MongoDB
db.cities.find({"motto":{"$regex":"friendship"}}).pretty()
if i do the query this way,i should match only documents where "friendship" is the entire string of the motto
db.cities.find({"motto":{"$regex":"[Ff]riendship"}}).pretty()
查找包含“frienship”一词的所有座右铭的文件,其中friendship的f可以大写,也可以小写
db.cities.find({"motto":{"$regex":"[Ff]riendship|[Pp]ride"}}).pretty
该正则表达式将确定motto包含词语friendship或者pride的所有文档,任何一个词语都可以大写或者小写
21. 使用标量查询
db.autos.find({"modelYears":1980}).pretty
modelYears字段对应的值是数组
23. 使用$in 运算符查询
$in 运算符允许我们指定数组值
db.autos.find({"modelYears":{"$in":[1965,1966,1967]}}).count()
本查询将检索modelYears字段中包含数组[1965,1966,1967]中任意一个值的文档
示例
def in_query():
# Modify the below line with your query; try to use the $in operator.
query = {"manufacturer":"Ford Motor Company","assembly":{"$in":["Germany","Japan","United Kingdom"]}}
return query
# Do not edit code below this line in the online code editor.
# Code here is for local use on your own computer.
def get_db():
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client.examples
return db
if __name__ == "__main__":
db = get_db()
query = in_query()
autos = db.autos.find(query, {"name":1, "manufacturer":1, "assembly": 1, "_id":0})
print "Found autos:", autos.count()
import pprint
for a in autos:
pprint.pprint(a)
24 使用$all 运算符查询
将检索字段包含的所有值
db.autos.find({"modelYears":{"$all":[1965,1966,1967]}})
25.点表示法
query for values inside nested documents
db.tweets.find().pretty()
#!/usr/bin/env python
"""
Your task is to write a query that will return all cars with width dimension
greater than 2.5. Please modify only the 'dot_query' function, as only that
will be taken into account.
Your code will be run against a MongoDB instance that we have provided.
If you want to run this code locally on your machine, you will need to install
MongoDB, download and insert the dataset. For instructions related to MongoDB
setup and datasets, please see the Course Materials.
"""
def dot_query():
# Edit the line below with your query - try to use dot notation.
# You can check out example_auto.txt for an example of the document
# structure in the collection.
query = {"dimensions.width":{"$gt":2.5}}
return query
# Do not edit code below this line in the online code editor.
# Code here is for local use on your own computer.
def get_db():
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client.examples
return db
if __name__ == "__main__":
db = get_db()
query = dot_query()
cars = db.cars.find(query)
print "Printing first 3 results\n"
import pprint
for car in cars[:3]:
pprint.pprint(car)
26. 更新
对集合中现有文档进行修改
save()
def main():
city=db.cities.find_one({"name":"munchen",
"country":"Germany"}) #returns the first document it finds
city['isoCountryCode']='DEU'
db.cities.save(city)
save()
a method on collections objects
调用save()时,将更新本文档以包括该字段
27 设置与复位 $set & $reset
update()将查询文档作为第一个参数,将更新文档作为第二个参数
by default,update operates on just one document
$set
def find():
city=db.cities.update({"name":"munchen",
"country":"Germany"},
{"$set":
{"isoCountryCode":"DEU"
}})
$set的语义是:找到匹配的文档后,
如果该文档不包含这里指定的字段,那么字段添加该值
如果该文档已包含这里指定的字段,那么该字段更新为提供的值
$unset
def find():
city=db.cities.update({"name":"munchen",
"country":"Germany"},
{"$set":
{"isoCountryCode":""
}})
$unset的语义是:找到匹配的文档后,无论什么文档与该查询匹配
如果有这里指定的字段,删除该字段,忽略该值
如果文档没有这里指定的字段,那么该调用无效
28. 多项更新
def find():
city=db.cities.update({"country":"Germany"},
{"$set": {"isoCountryCode":"DEU"}},multi=True)
by default,update will modify just the first document it finds,
in order to modify all document match the query,we need to specify
multi=True
29 删除文档
> use examples
switched to db examples
>db.cities.find() #返回集合中的所有文档
>db.cities.remove() #删除该集合的所有数据
>db.cities.drop() #删除集合以及与其相关的任何元数据,比如索引
>db.cities.remove({"name":"Chicago"}) #删除集合中与chicago相关的所有文档
>db.cities.remove({"name":{"$exist":0}}) #删除集合中所有name字段不存在的文档