看看我们要达到的目的
1.下载PYCHARM
PYcharm
#coding=utf-8
import sys
import re
import urllib
def gethtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getmessage(html):
p = re.compile(r'<div class="content">(.*)</div><script type="text/javascript">')
message = re.findall(p, html) # 返回正则匹配的结果
return message
fp = open('data.txt','w+')
for i in range(1,7000):
i = str(i)
web = gethtml('http://ishuo.cn/subject/' + i)
message = getmessage(web)
message2 = ''.join(message) # 将结果转换为字符串类型
message2=str(message2)
print message2
fp.writelines(message2 + '\n')
fp.close()