python 简单抓取拉钩iOS职位，matplotlib展示

# coding=utf-8
import requests
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')

import requests
import json

headers = {
    'Accept' : 'application/json, text/javascript, */*; q=0.01' ,
    'Accept-Encoding' : 'gzip, deflate, br' ,
    'Accept-Language' : 'zh-CN,zh;q=0.9,en;q=0.8' ,
    'Connection' : 'keep-alive' ,
    'Content-Length' : '23' ,
    'Content-Type' : 'application/x-www-form-urlencoded; charset=UTF-8' ,
    'Cookie' : 'user_trace_token=20180614213205-1ed85102-96b3-46b0-97c0-e73b711c8763; JSESSIONID=ABAAABAAAGFABEF5EE54015C5797042B80918E0FC34818B; X_HTTP_TOKEN=b95527a4550f8d93d47b619adada743c; LGUID=20180614213509-c1f72ef8-6fd7-11e8-a465-525400f775ce; _ga=GA1.2.1217499303.1528983310; _gid=GA1.2.1865314832.1528983311; TG-TRACK-CODE=index_search; index_location_city=%E5%8C%97%E4%BA%AC; _gat=1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1528983310,1528985325; LGSID=20180614220844-72ff3add-6fdc-11e8-a468-525400f775ce; PRE_UTM=; PRE_HOST=www.google.com.tw; PRE_SITE=https%3A%2F%2Fwww.google.com.tw%2F; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; SEARCH_ID=2e6ad26430e14a729f89c145ed7e4965; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1528985510; LGRID=20180614221149-e167c83b-6fdc-11e8-9642-5254005c3644' ,
    'DNT' : '1' ,
    'Host' : 'www.lagou.com' ,
    'Origin' : 'https://www.lagou.com' ,
    'Referer' : 'https://www.lagou.com/jobs/list_iOS?city=%E5%8C%97%E4%BA%AC&cl=false&fromSearch=true&labelWords=&suginput=' ,
    'User-Agent' : 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Mobile Safari/537.36' ,
    'X-Anit-Forge-Code' : '0' ,
    'X-Anit-Forge-Token' : 'None' ,
    'X-Requested-With' : 'XMLHttpRequest'
}

ajax_url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false' 

out = []
for i in range(31, 35):
    time.sleep(3)
    post_param = {"first": "false", "pn": str(i), "kd": "iOS"} 
    r = requests.post(ajax_url, headers=headers, data=post_param)
    result = json.loads(r.text)
    print str(i)
    print result
    ar = result["content"]["positionResult"]["result"]
    out = out + ar
    print len(out)
outputFilePath = "/Users/dfpo/Desktop/postman22.json"
jsonStr = json.dumps( out, ensure_ascii=False, encoding='UTF-8')
with open(outputFilePath, 'wt') as f:
    f.write(jsonStr)

# coding=utf-8
import os
import matplotlib.pyplot as plt
import json
def getOldArray():
    oldQYDWithParameterFilePath = "/Users/dfpo/Desktop/postman.json"
    if not os.path.exists(oldQYDWithParameterFilePath):
        print oldQYDWithParameterFilePath + "文件不存在，不能执行合并操作"
        return
    with open(oldQYDWithParameterFilePath,'r') as load_f:
        oldPostmanDict = json.load(load_f)
    return oldPostmanDict

array = getOldArray()
names = []
counts = []
for dict in array:
    # name = dict["industryField"] #workYear#education#financeStage#district#salary#companySize#firstType#industryField
    # companyLabels = dict["positionLables"]#businessZones#companyLabelList#positionLables
    # if companyLabels is not None:
    #     for name in companyLabels:
    #         if name is not None:
    #             if name in names:
    #                 nameIdx = names.index(name)
    #                 counts[nameIdx] += 1
    #             else:
    #                 names.append(name)
    #                 counts.append(1)

                    # positionAdvantage
    companyLabels = dict["positionAdvantage"]
    companyLabels = companyLabels.split(',')
    if companyLabels is not None:
        for name in companyLabels:
            if name is not None:
                if name in names:
                    nameIdx = names.index(name)
                    counts[nameIdx] += 1
                else:
                    names.append(name)
                    counts.append(1)
labels = names
sizes = counts
explode = [0 for n in range(len(names))]
maxIdx = counts.index(max(counts))
explode[maxIdx] = 0.1
fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',shadow=True, startangle=90)
ax1.axis('equal')

plt.show()

效果

image.png

最后编辑于：2018.06.15 09:17:36