目的
市场上有很多互联网金融公司开展了P2P业务,用户购买某P2P产品并持有一定时间后可以转让,用户为了能顺利出让,一般会出让部分收益,因此会出现很高收益的理财产品,本文的目的便是利用爬虫抢到这些高收益的理财产品。
目标
这里选取小牛理财wap作站点为爬取网站[https://www.xiaoniu88.com/weixin]
通过分析该网站的请求,发现理财产品列表是有单独的接口提供json数据,因此不用费劲的爬取页面做元素解析。
爬虫的大致方案如下:
- 使用python的urllib2和BeautifulSoup工具
- 使用sqllite3存储历史数据
- 自定义抢购理财产品
--使用sqllite3 存储爬虫数据
CREATE TABLE product(id int primary key not null,showDeadline long,showAnnualRate float,borrowAmount float,borrowTitle varchar(100),fullTenderTime long,publishTime long);
调用mac弹窗提醒
osascript -e 'display notification "有推荐的理财" with title "理财来了"'
##使用yaml读取配制文件提醒购买,例如遇到年花收益率大于15%,金额大于20000元,剩余时间不超过100天则提醒
#cfg.yaml
#showAnnualRate: 15
#borrowAmount: 20000
#showdeadline: 100
import yaml
cfg=yaml.load(f)
annualRate_target = cfg['showAnnualRate']
amount_target = cfg['borrowAmount']
showdeadline = cfg['showdeadline']
#遇到符合配制文件条件的数据调用浏览器打开符合浏览器指向该理财产品页面,并调用mac语音命令提醒
import webbrowser
url_target = 'https://www.xiaoniu88.com/weixin/productDetails/'+str(id)
webbrowser.open(url_target)
print webbrowser.get()
os.system("say '新理财' ")
##爬虫采取urlib2和BeautifulSoup,需要设置完整cookie,否则无法爬取
req = urllib2.Request(url,None,req_header)
response = urllib2.urlopen(req)
data = response.read()
soup=BeautifulSoup(data,'lxml')
json1 = json.loads(soup.p.string)
运行爬虫
运行爬虫后,当遇到符合条件的理财产品时,便会提醒,并在浏览器打开该理财产品。
历史数据分析
通过对爬取的历史数据进行分析,发现大概每天会几十到一百笔左右理财转让。
日期 | 转让笔数 |
---|---|
2017-05-01 | 36 |
2017-05-02 | 93 |
2017-05-03 | 25 |
2017-05-04 | 44 |
2017-05-05 | 139 |
2017-05-06 | 59 |
2017-05-07 | 55 |
2017-05-08 | 119 |
2017-05-09 | 30 |
2017-05-10 | 85 |
2017-05-11 | 81 |
2017-05-12 | 119 |
2017-05-13 | 65 |
2017-05-14 | 43 |
2017-05-15 | 139 |
2017-05-16 | 158 |
2017-05-17 | 70 |
2017-05-18 | 88 |
2017-05-19 | 89 |
2017-05-20 | 54 |
2017-05-21 | 35 |
2017-05-22 | 187 |
前十名收益的产品来看,最高收益超过了90%,基本都是秒抢,因此一般还是很难抢到,下一步需要研究下下单的接口,如果能自动下单概率就大很多。
名称 | id | 持有天数 | 年化收益 | 秒抢时间
----|------|----
安心牛Z20160808-203|470506|44|92.78|14
安心牛Z20170112-135|506087|50|79.2|12
安心牛Z20170417-117|523123|118|58.32|7
安心牛Z20170130-040|508817|48|56.07|70
安心牛Z20160811-170|471457|42|50.88|86
安心牛Z20170319-092|518157|49|45.48|36
安心牛Z20170511-060|527610|212|44.15|20
安心牛Z20161219-183|501525|99|40.0|67
安心牛Z20160731-079|468159|111|37.3|9
安心牛Z20170417-118|523124|123|34.86|9
完整代码
# -*- coding: utf-8 -*-
import urllib2
import sys
import os
import gzip
import StringIO
import json
from bs4 import BeautifulSoup
import webbrowser
import pygame
import yaml
import time
import sqlite3
cookie='''referer="https://www.baidu.com/_2017-05-17|https://www.baidu.com/_2017-05-17"; JSESSIONID=2260FFDD0CA3CE64B611C1379EBB09D7.t-9003; _ga=GA1.2.2061273543.1495018863; Hm_lvt_7226b8c48cd07619c7a9ebd471d9d589=1495018863,1495020571; Hm_lpvt_7226b8c48cd07619c7a9ebd471d9d589=1495020778; _jzqx=1.1495459084.1495459084.1.jzqsr=xiaoniu88%2Ecom|jzqct=/weixin/register.-; _jzqckmp=1; _jzqa=1.751493343713972600.1495459084.1495459084.1495459084.1; _jzqc=1; _qzja=1.2047379673.1495459084080.1495459084080.1495459084082.1495459084080.1495459084082..1.0.1.1; _qzjc=1; SESSIONID=38c1567f-4fdb-44f3-a136-7e890cfc7d8f; Hm_lvt_a8083b9357dade16389e88eed86fe3df=1495459013,1495459102,1495459585,1495459595; Hm_lpvt_a8083b9357dade16389e88eed86fe3df=1495468356; sr=334.127.201.43.11.3.123.58.160.131.0.33.20.15.07'''
req_header = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
'Accept-Encoding':'gzip, deflate, sdch, br',
'Connection':'keep-alive',
'Cookie':cookie,
'Referer':'https://www.xiaoniu88.com/weixin/productlist/v2/transferable',
'Host':'www.xiaoniu88.com',
'Upgrade-Insecure-Requests':'1',
'Cache-Control':'max-age=0'}
reload(sys)
sys.setdefaultencoding('utf8')
print 'ok'
conn=sqlite3.connect("/Users/jerryliu/soft/sqlite-tools-osx-x86-3180000/test.db")
url_model = "https://www.xiaoniu88.com/weixin/productlist/v2/transferable/{0}/100?1495467410387"
i=0
while 2>1:
url = url_model.format(i)
print 'url'+url
i=i+1
req = urllib2.Request(url,None,req_header)
response = urllib2.urlopen(req)
data = response.read()
soup=BeautifulSoup(data,'lxml')
json1 = json.loads(soup.p.string)
result = json1['data']
f=open("cfg.yaml")
cfg=yaml.load(f)
annualRate_target = cfg['showAnnualRate']
amount_target = cfg['borrowAmount']
print 'len:'+str(len(result))
for d in result:
id = d['productDTO']['id'] #id
showdeadline = d['productDTO']['showDeadline'] ##截止时间
showAnnualRate = d['productDTO']['showAnnualRate'] ##年化收益率
borrowAmount = d['productDTO']['borrowAmount'] ##金额
borrowTitle = d['productDTO']['borrowTitle'] ##标记
fullTenderTime = d['productDTO']['fullTenderTime'] ##满额时间
publishTime = d['productDTO']['publishTime']
borrowTitle = d['productDTO']['borrowTitle']
expireDt = d['productDTO']['expireDt']
if fullTenderTime==None:
fullTenderTime=0
print str(id)+":"+str(showAnnualRate)+":"+str(borrowAmount)+":"+borrowTitle
sql='''insert into product (id,showDeadline,showAnnualRate,borrowAmount,borrowTitle,fullTenderTime,publishTime) values ({0},{1},{2},{3},'{4}',{5},{6}) '''.format(id,showdeadline,showAnnualRate,borrowAmount,borrowTitle,fullTenderTime,publishTime)
try:
conn.execute(sql)
conn.commit()
except sqlite3.IntegrityError:
print 'dupli'
if showAnnualRate>=float(annualRate_target) and borrowAmount>=float(amount_target) and fullTenderTime==0:
cmd = ''' osascript -e 'display notification "有推荐的理财" with title "理财来了"' '''
os.system(cmd)
url_target = 'https://www.xiaoniu88.com/weixin/productDetails/'+str(id)
webbrowser.open(url_target)
print webbrowser.get()
os.system("say '新理财' ")
break
time.sleep(10)