import urllib.request
from bs4 import BeautifulSoup
import time
class ZhilianSpider(object):
def __init__(self,jl,kw,start_page,end_page):
#保存到成员属性中
self.jl = jl
self.kw = kw
self.start_page = start_page
self.end_page = end_page
self.url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?'
self.headers={
}
self.items = []
def handle_request(self,page):
data = {
'jl':self.jl,
'kw':self.kw,
'p':page
}
data = urllib.parse.urlencode(data)
url = self.url + data
request = urllib.request.request(url=url,headers=headers)
return request
# print(url)
def parse_content(self,concent):
#实例化一个soup
soup = BeautifulSoup(concent,'lxml')
#解析,得到内容
#先找到所有的table
table_list = soup.find_all('table',class_='newlist')[1:]
for table in table_list:
#职位名称
zwmc = table.select(".zwcm > div > a")[0].text
#获取公司名称
gsmc = table.select('.gsmc > a')[0].text
#获取职位月薪
zwyx = table.select('.zwyx')[0].text
#工作地点
gzdd = table.select('.gzdd')[0].text
#将其保存到字典中、
item = {
'职位名称':zwmc,
'公司名称':gswx,
'职位月薪':zwyx,
'工作地点':gzdd
}
self.items.append(item)
def run(self):
#循环,拼接每一页的url,发送请求,获取响应
for page in range(start_page,end_page+1):
request = self.handle_request(page)
concent = urllib.request.urlopen(request).read().decode("utf8")
#调用解析函数解析内容
self.parse_content(content)
#将列表写入到文件中
string = str(self.items)
with open('work.txt','w',encoding='utf8') as fp:
fp.write(string)
def main():
#输入工作地点
jl = input("请输入工作地点:")
#输入关键字
kw = input("请输入搜索关键字:")
#输入页码
start_page = int(input("请输入起始页码:"))
end_page = int(input("请输入结束页码:"))
zhilian = ZhilianSpider(jl,kw,start_page,end_page)
zhilian.run()
if __name__ == '__main__':
main()