# 1.掌握python常用数据类型和语法
# 列表的排序
# li = []
# for i in range(10):
# li.append(i)
# print(li)
# from random import shuffle
# shuffle(li)
# print('随机打乱的列表', li)
# li.sort(reverse=True)
# print('排序后的列表', li)
stu_info = [
{"name":'zhangsan', "age":18},
{"name":'lisi', "age":30},
{"name":'wangwu', "age":99},
{"name":'tiaqi', "age":3},
]
print('排序前', stu_info)
# def 函数名(参数):
# 函数体
def sort_by_age(x):
return x['age']
# key= 函数名 --- 按照什么进行排序
# 根据年龄大小进行正序排序
stu_info.sort(key=sort_by_age, reverse=True)
print('排序后', stu_info)
# 练习
name_info_list = [
('张三',4500),
('李四',9900),
('王五',2000),
('赵六',5500),
]
#2 根据元组第二个元素进行正序排序
# -*- coding: utf-8 -*-
# @Time : 2019/7/31 11:00
# @Author : Eric Lee
# @Email : li.yan_li@neusoft.com
# @File : demo14.py
# @Software: PyCharm
# requests
# 导入
import requests
# url = 'https://www.baidu.com'
# url = 'https://www.taobao.com/'
# url = 'http://www.dangdang.com/'
# response = requests.get(url)
# print(response)
# 获取str类型的响应
# print(response.text)
#获取bytes类型的响应
# print(response.content)
#获取响应头
#print(response.headers)
# 获取状态码
# print(response.status_code)
# print(response.encoding)
# 200 ok 404 500
#返回200成功,400未找到,500服务器错误
# 没有添加请求头的知乎网站
# resp = requests.get('https://www.zhihu.com/')
# print(resp.status_code)
# 使用字典定义请求头
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"}
resp = requests.get('https://www.zhihu.com/', headers = headers)
print(resp.status_code)
# -*- coding: utf-8 -*-
# @Time : 2019/7/31 11:28
# @Author : Eric Lee
# @Email : li.yan_li@neusoft.com
# @File : spider_dangdang.py
# @Software: PyCharm
import requests
from lxmlimport html
import pandasas pd
from matplotlibimport pyplotas plt
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] =False
def spider_dangd///ng(isbn):
book_list = []
# 目标站点地址
url ='http://search.dangdang.com/?key={}&act=input'.format(isbn)
# print(url)
# 获取站点str类型的响应
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"}
resp = requests.get(url, headers=headers)
html_data = resp.text
# 将html页面写入本地
# with open('dangdang.html', 'w', encoding='utf-8') as f:
# f.write(html_data)
# 提取目标站的信息
selector = html.fromstring(html_data)
ul_list = selector.xpath('//div[@id="search_nature_rg"]/ul/li')
print('您好,共有{}家店铺售卖此图书'.format(len(ul_list)))
# 遍历 ul_list
for liin ul_list:
# 3图书名称
title = li.xpath('./a/@title')[0].strip()
# print(title)
# 图书购买链接
link = li.xpath('a/@href')[0]
# print(link)
# 图书价格
price = li.xpath('./p[@class="price"]/span[@class="search_now_price"]/text()')[0]
price =float(price.replace('¥',''))
# print(price)
# 图书卖家名称
store = li.xpath('./p[@class="search_shangjia"]/a/text()')
# if len(store) == 0:
# store = '当当自营'
# else:
# store = store[0]
store ='当当自营' if len(store) ==0 else store[0]
# print(store)
# 添加每一个商家的图书信息
book_list.append({
'title':title,
'price':price,
'link':link,
'store':store
})
# 按照价格进行排序
book_list.sort(key=lambda x:x['price'])
# 遍历booklist
for bookin book_list:
print(book)
# 展示价格最低的前10家 柱状图
# 店铺的名称
top10_store = [book_list[i]for iin range(10)]
# x = []
# for store in top10_store:
# x.append(store['store'])
x = [x['store']for xin top10_store]
print(x)
# 图书的价格
y = [x['price']for xin top10_store]
print(y)
# plt.bar(x, y)
plt.barh(x, y)
plt.show()
# 存储成csv文件
df = pd.DataFrame(book_list)
df.to_csv('dangdang.csv')
spider_dangdang('9787115428028')