# coding=utf-8
import requests
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import requests
import json
headers = {
'Accept' : 'application/json, text/javascript, */*; q=0.01' ,
'Accept-Encoding' : 'gzip, deflate, br' ,
'Accept-Language' : 'zh-CN,zh;q=0.9,en;q=0.8' ,
'Connection' : 'keep-alive' ,
'Content-Length' : '23' ,
'Content-Type' : 'application/x-www-form-urlencoded; charset=UTF-8' ,
'Cookie' : 'user_trace_token=20180614213205-1ed85102-96b3-46b0-97c0-e73b711c8763; JSESSIONID=ABAAABAAAGFABEF5EE54015C5797042B80918E0FC34818B; X_HTTP_TOKEN=b95527a4550f8d93d47b619adada743c; LGUID=20180614213509-c1f72ef8-6fd7-11e8-a465-525400f775ce; _ga=GA1.2.1217499303.1528983310; _gid=GA1.2.1865314832.1528983311; TG-TRACK-CODE=index_search; index_location_city=%E5%8C%97%E4%BA%AC; _gat=1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1528983310,1528985325; LGSID=20180614220844-72ff3add-6fdc-11e8-a468-525400f775ce; PRE_UTM=; PRE_HOST=www.google.com.tw; PRE_SITE=https%3A%2F%2Fwww.google.com.tw%2F; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; SEARCH_ID=2e6ad26430e14a729f89c145ed7e4965; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1528985510; LGRID=20180614221149-e167c83b-6fdc-11e8-9642-5254005c3644' ,
'DNT' : '1' ,
'Host' : 'www.lagou.com' ,
'Origin' : 'https://www.lagou.com' ,
'Referer' : 'https://www.lagou.com/jobs/list_iOS?city=%E5%8C%97%E4%BA%AC&cl=false&fromSearch=true&labelWords=&suginput=' ,
'User-Agent' : 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Mobile Safari/537.36' ,
'X-Anit-Forge-Code' : '0' ,
'X-Anit-Forge-Token' : 'None' ,
'X-Requested-With' : 'XMLHttpRequest'
}
ajax_url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false'
out = []
for i in range(31, 35):
time.sleep(3)
post_param = {"first": "false", "pn": str(i), "kd": "iOS"}
r = requests.post(ajax_url, headers=headers, data=post_param)
result = json.loads(r.text)
print str(i)
print result
ar = result["content"]["positionResult"]["result"]
out = out + ar
print len(out)
outputFilePath = "/Users/dfpo/Desktop/postman22.json"
jsonStr = json.dumps( out, ensure_ascii=False, encoding='UTF-8')
with open(outputFilePath, 'wt') as f:
f.write(jsonStr)
# coding=utf-8
import os
import matplotlib.pyplot as plt
import json
def getOldArray():
oldQYDWithParameterFilePath = "/Users/dfpo/Desktop/postman.json"
if not os.path.exists(oldQYDWithParameterFilePath):
print oldQYDWithParameterFilePath + "文件不存在,不能执行合并操作"
return
with open(oldQYDWithParameterFilePath,'r') as load_f:
oldPostmanDict = json.load(load_f)
return oldPostmanDict
array = getOldArray()
names = []
counts = []
for dict in array:
# name = dict["industryField"] #workYear#education#financeStage#district#salary#companySize#firstType#industryField
# companyLabels = dict["positionLables"]#businessZones#companyLabelList#positionLables
# if companyLabels is not None:
# for name in companyLabels:
# if name is not None:
# if name in names:
# nameIdx = names.index(name)
# counts[nameIdx] += 1
# else:
# names.append(name)
# counts.append(1)
# positionAdvantage
companyLabels = dict["positionAdvantage"]
companyLabels = companyLabels.split(',')
if companyLabels is not None:
for name in companyLabels:
if name is not None:
if name in names:
nameIdx = names.index(name)
counts[nameIdx] += 1
else:
names.append(name)
counts.append(1)
labels = names
sizes = counts
explode = [0 for n in range(len(names))]
maxIdx = counts.index(max(counts))
explode[maxIdx] = 0.1
fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',shadow=True, startangle=90)
ax1.axis('equal')
plt.show()
效果