import requests
import re
class chouti:
def __init__(self):
self.agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"
self.host = "dig.chouti.com"
self.headers = {
'Host':self.host,
'User-Agent':self.agent
}
self.session = requests.session()
self.pageIndex = 1
def getPage(self,pageIndex):
url = "http://dig.chouti.com/all/hot/recent/" + str(pageIndex)
response = self.session.get(url)
pattern = re.compile('<div class="news.*?>.*?<a href=(.*?) class="show.*?>.*?<div class="part2" share-pic=(.*?) share-title=.*?>.*?<span class="hand-icon .*?</span>.*?<b>(.*?)</b>',re.S)
items = re.findall(pattern, response.text)
for item in items:
print(item[0] + '\n' + item[1] + '\n'+item[3])
ct = chouti()
chouti.getPage(ct, ct.pageIndex)
import requests
import re
class chouti:
def __init__(self):
self.agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"
self.host = "dig.chouti.com"
self.headers = {
'Host':self.host,
'User-Agent':self.agent
}
self.session = requests.session()
self.pageIndex = 1
self.news = []
def getPage(self,pageIndex):
url = "http://dig.chouti.com/all/hot/recent/" + str(pageIndex)
response = self.session.get(url)
pattern = re.compile('<div class="news.*?>.*?<a href=(.*?) class="show.*?>.*?<div class="part2" share-pic=(.*?) share-title=.*?>.*?<span class="hand-icon .*?</span>.*?<b>(.*?)</b>',re.S)
# pattern = re.compile('<div class="news.*?>.*?<a href=(.*?) class="show.*?>.*?<div class="part2" share-pic=(.*?) share-title=.*?>.*?<span class="hand-icon .*?</span>.*?<b>(.*?)</b>',
# re.S)
items = re.findall(pattern, response.text)
for item in items:
print(item[0] + '\n' + item[1] + '\n'+item[2])
self.news.append(item)
def getRefreshen(self):
page = 1
while page <= 5:
self.getPage(page)
page+=1
self.downLoad()
def downLoad(self):
f = open('chouti.txt', 'a')
for item in self.news:
f.write(item[0] + '\n' + item[1] + '\n' + item[2] + '\n\n')
f.close()
def start(self):
self.getRefreshen()
ct = chouti()
ct.start()
import re
import requests
session = requests.session()
index = 1
try:
f = open('chouti.txt','r')
txt = f.read()
pattern = re.compile('http://(.*?).jpg|http://(.*?).png',re.S)
items = re.findall(pattern, txt)
for item in items:
url = "http://" + item + ".jpg"
respone = session.get(url)
f1 = open(str(index)+".jpg", 'wb')
f1.write()
f1.close()
finally:
f.close()