欢迎交流,QQ:2544100193
文件下载地址
一、获取所有的二手房的网址并保存
代码
import requests
import re
import csv
def get_allurls(page_numbers):
url = 'http://bj.cityhouse.cn/forsale/pg{}/'
urls = []
for x in range(2,int(page_numbers)):
urls.append(url.format(x))
uurrllss = []
for x in urls:
r = requests.get(x)
r.encoding = 'utf-8'
re_x = re.compile(r'<h4 class="tit"><a target="_blank" href="(.*?)"')
content = re.findall(re_x,r.text)
for item in content:
uurrllss.append('http://bj.cityhouse.cn'+item)
with open('urls.csv','w') as f:
x = csv.writer(f)
x.writerow(['url_name'])
for y in uurrllss:
x.writerow([y])
if __name__ == '__main__':
get_allurls(20)
结果:
二、爬取每个网页所需要的信息并保存到CSV文件
代码:
import requests
from bs4 import BeautifulSoup
import re
import csv
import pandas as pd
import numpy as np
def get_alldatas():
data = pd.read_csv('urls.csv')
urls = np.array(data["url_name"])
mianji = []
danjia = []
quyu = []
for url in urls[0:100]:
r = requests.get(url)
soup = BeautifulSoup(r.text,"lxml")
print(soup.find(id="fyt_bldgarea"))
if soup.find(id="fyt_bldgarea") is not None:
mianji.append(((soup.find(id="fyt_bldgarea")).string)[:-1])
danjia.append(''.join(re.findall('[0-9]',(soup.find(id="fyt_price")).string)))
quyu.append(((soup.find(id="fyt_district")).string))
dataframe = pd.DataFrame({"面积":mianji,'每平米价格':danjia,'地区':quyu})
dataframe.to_csv("data.csv",index=False,encoding="gb2312")
if __name__ == '__main__':
get_alldatas()
结果:
三、画图对比各区平均房价
代码:
#-*- coding=utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def get_datas():
data = pd.read_csv('data.csv',encoding="gb2312")
diqus = np.array(data["地区"])
jiages = np.array(data["每平米价格"])
mianjis = np.array(data["面积"])
#遍历
for index,value in enumerate(diqus):
if value == '石景山区':
shijingshan.append(jiages[index])
elif value == '密云区':
miyun.append(jiages[index])
elif value == '朝阳区':
chaoyang.append(jiages[index])
elif value == '大兴区':
daxing.append(jiages[index])
elif value == '丰台区':
fengtai.append(jiages[index])
elif value == '房山区':
fangshan.append(jiages[index])
elif value == '东城区':
dongcheng.append(jiages[index])
elif value == '海淀区':
haidian.append(jiages[index])
elif value == '通州区':
tongzhou.append(jiages[index])
print(np.mean(chaoyang))
def huatu():
labels = np.array([u'shijingshan',u'miyun',u'chaoyang',u'daxing',u'fengtai',u'fangshan',u'dongcheng',u'haidian',u'tongzhou'])
datalenth = 9
dat = np.array([np.mean(shijingshan),np.mean(miyun),np.mean(chaoyang),np.mean(daxing),np.mean(fengtai),np.mean(fangshan),np.mean(dongcheng),np.mean(haidian),np.mean(tongzhou)])
angles = np.linspace(0, 2*np.pi, datalenth, endpoint=False)
data = np.concatenate((dat, [dat[0]])) # 闭合
angles = np.concatenate((angles, [angles[0]])) # 闭合
fig = plt.figure()
ax = fig.add_subplot(111, polar=True)# polar参数!!
ax.plot(angles, data, 'bo-', linewidth=2)# 画线
ax.fill(angles, data, facecolor='r', alpha=0.25)# 填充
ax.set_thetagrids(angles * 180/np.pi, labels)
ax.set_title(u"123", va='bottom')
ax.grid(True)
plt.show()
if __name__ == '__main__':
shijingshan = []
miyun = []
chaoyang = []
daxing = []
fengtai = []
fangshan = []
dongcheng = []
haidian = []
tongzhou = []
get_datas()
huatu()
结果: