#JAVA 封装成类 面向对象的形式
import requests
# https://tieba.baidu.com/f?ie=utf-8&kw=JAVA
class TiebaSpider(object):
def __init__(self,tieba_name):
self.tieba_name=tieba_name
self.url='https://tieba.baidu.com/f?kw=java&ie=utf-8&pn={}'
def get_url_list(self):
"""构造URL列表"""
url_list=[]
for i in range(5):
url_list.append(self.url.format(i*50))
return url_list
def parse_url(self,url):
"""发送请求 获取响应"""
response=requests.get(url)
return response.text
def save_html(self,page_num,tb_html):
"""保存页面"""
file_path='jave_html/{}-第{}页.html'.format(self.tieba_name,page_num)
#jave-第1页
with open(file_path,'w',encoding='utf-8') as f:
f.write(tb_html)
def run(self):
#实现主要业务逻辑
#1.构造URL列表
tieba_url_list=self.get_url_list()
for tburl in tieba_url_list:
print(tburl)
tb_html=self.parse_url(tburl)
#保存页面
page_num=tieba_url_list.index(tburl)+1
self.save_html(page_num,tb_html)
if __name__ == "__main__":
tb_spider=TiebaSpider('jave')
tb_spider.run()