Python 2.7 按照程序提示 输入账号密码之后 可以 获得到知乎热点新闻的标题链接。如果想获得知乎其他信息可以自行修改。
直接上代码啦
<code>
import re
import requests
import cookielib
from PIL import Image
import time
import json
import webbrowser
from attr import attrib
from lxml import etree
import urllib2
import urlparse
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'}
filename = 'cookie'
session = requests.Session()
session.cookies = cookielib.CookieJar()
try:
session.cookies.load(filename=filename, ignore_discard=True)
except:
print('cookie fail')
# <input type="hidden" name="_xsrf" value="f1f90f1cfe8ec5c732ef0d8833ccabe8"/>
def get_xsrf():
response = session.get('https://www.zhihu.com', headers=headers)
html = response.text
get_xsrf_pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"')
_xsrf = re.findall(get_xsrf_pattern, html)[0]
return _xsrf
def get_captcha():
t = str(int(time.time() * 1000))
captcha_url = 'https://www.zhihu.com/captcha.gif?r=' + t + "&type=login"
response = session.get(captcha_url, headers=headers)
with open('cptcha.gif', 'wb') as f:
f.write(response.content)
im = Image.open('cptcha.gif')
im.show()
captcha = raw_input('Verification code:')
print captcha
return captcha
def login(username, password):
if re.match(r'\d{11}$', account):
print('phone logining')
url = 'http://www.zhihu.com/login/phone_num'
data = {'_xsrf': get_xsrf(),
'password': password,
'remember_me': 'true',
'phone_num': username
}
else:
print('email longing')
url = 'https://www.zhihu.com/login/email'
data = {'_xsrf': get_xsrf(),
'password': password,
'remember_me': 'true',
'email': username
}
data['captcha'] = get_captcha()
result = session.post(url, data=data, headers=headers)
print((json.loads(result.text))['msg']+' codeLogin')
# session.cookies.save(ignore_discard=True, ignore_expires=True)
def nextMore(offset, start):
url = 'https://www.zhihu.com/node/TopStory2FeedList'
data = {'params': {'offset':offset, 'start':start},
'method': 'next'
}
result = session.post(url, data=data, headers=headers)
print((json.loads(result.text))['msg'] + ' ')
def download(url, headers, proxy, num_retries, data=None):
headers = headers or {}
print 'Downloading:', url
request = urllib2.Request(url, data, headers)
opener = urllib2.build_opener()
if proxy:
proxy_params = {urlparse.urlparse(url).scheme: proxy}
opener.add_handler(urllib2.ProxyHandler(proxy_params))
try:
response = opener.open(request)
html = response.read()
code = response.code
except urllib2.URLError as e:
print 'Download error:', e.reason
html = ''
if hasattr(e, 'code'):
code = e.code
if num_retries > 0 and 500 <= code < 600:
# retry 5XX HTTP errors
return download(url, headers, proxy, num_retries - 1, data)
else:
code = None
return html
if name == 'main':
account = raw_input('account:')
secret = raw_input('password:')
login(account, secret)
get_url = 'https://www.zhihu.com/explore/recommendations'
resp = session.get(get_url, headers=headers, allow_redirects=False)
page = etree.HTML(resp.text)
i = 1
while (i<6):
string = "//div[@id='zh-recommend']/div[2]/div[1]/div[" + str(i) + "]/h2/a"
hrefs = page.xpath(string)
for href in hrefs:
print href.text + '\n' + 'https://www.zhihu.com' + str(href.attrib['href'])
url = 'https://www.zhihu.com' + str(href.attrib['href'])
i = i + 1
webbrowser.open(get_url, new=0, autoraise=True)
</code>