来写我们的爬虫 >_< (我可能是条咸鱼了吧!) 爬虫十分简单也没有爬取得网站也没有什么反爬整体还是很方便得
# -*- coding:utf-8 -*-
import requests
import random
from bs4 import BeautifulSoup
import pymssql
import os
# 获取网页源码
def get_data():
headers = Ua_headers()
response = requests.get('https://nba.hupu.com/standings',headers=headers,verify=False)
return response.text
# ua的形成
def Ua_headers():
user_agent_list = [
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/66.0.3359.139 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0',]
user_agent = random.choice(user_agent_list)
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}
headers['user_agent'] = user_agent
return headers
# 绑定写入数据库
def Analyze_data(data):
soup = BeautifulSoup(data,'html.parser')
datas = soup.find('tbody')
datas_tr = datas.find_all('tr')
# 链接我的数据库
conn = pymssql.connect("DESKTOP-SCIMUGR", "sa", "zxc1230.", "NBA")
cursor = conn.cursor()
cursor.execute('truncate table index_nbadata')
for tr in datas_tr:
print(tr)
td = tr.find_all("td")
if len(td) == 1:
area = td[0].text
else:
da1 = td[0].text # 排名
da2 = td[1].text # 队名
da3 = td[2].text # 胜场
da4 = td[3].text # 败场
da5 = td[4].text # 胜率
try:
img_url = td[1].find("a")['href']
get_img(img_url,td[1].text)
img = "/static/images/"+ da2 + '.png' # os.getcwd() + "\\NBA\\
except TypeError:
pass
try:
cursor.execute("insert into index_nbadata values('%d','%s','%d','%d','%s','%s','%s')"%(
int(da1),da2,int(da3),int(da4),da5,img,area
))
except ValueError:
pass
conn.commit() # 必须要有这个语句才能写入数据库
# 球队图片的抓取 因为在的django的文件夹里运行的进行想对的存储文件夹设置
def get_img(url,name):
path = os.getcwd() + '\\NBA_web\\index\\static\\images\\'
print(path)
headers = Ua_headers()
response = requests.get(url, headers=headers, verify=False)
soup = BeautifulSoup(response.text,'lxml')
img = soup.find('div',{'class':'img'})
img_url = img.find("img")['src']
pon = requests.get(img_url)
with open(path+name+'.png','ab+') as im:
im.write(pon.content)
im.close()
print('保存图片成功')
if __name__ == "__main__":
pon = get_data()
Analyze_data(pon)
# url = 'https://nba.hupu.com/teams/raptors'
# get_img(url,'雄鹿')
爬虫写好之后运行一次就可以获取到最新的数据 # (运行一次获取一次 哈哈)T_T我就是这个水平怎么滴!
这个时候数据库已经有数据啦 我们已经成功九成啦! 666!
要想让这些数据在网站上可是话是十分简单的 O_O!!
echarts 的官网 查找你所要用的图例 然后 https://www.echartsjs.com/dist/echarts-gl.js
把这个echarts-gl.js 下载下来 导入您的django 中以便使用 这样就完成啦 导入这个js 按照下图的写法进行可视化操作
这个是源码 模仿源码进行写就可以啦 上面的那些js 不用写的直接用刚刚下载的那个 echarts-gl.js 就可以啦
我的文件夹结构图:
我用了通用模块 views.py 是这样写的
from django.views import generic # 通用模块
class EastViews(generic.ListView):
template_name = 'east.html' # 指向前端网页名称
context_object_name = 'GGGG' # 数据的名称
def get_queryset(self):
return NBAData.objects.filter(area='东部').all() # 获取东部的数据
class WestViews(generic.ListView):
template_name = 'west.html'
context_object_name = 'GGGG'
def get_queryset(self):
return NBAData.objects.filter(area='西部').all() # 获取西部的数据
前端的书写 哈哈有趣的前端
# east.html
{% extends 'model.html' %}
{% load static %}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
{% block title %}
<title>东部球队展示</title>
{% endblock %}
{% block modeljs %}
<script type="text/javascript" src="{% static 'js/echarts.js' %}"></script>
<script src='{% static "js/jquery-3.1.0.js" %}'></script>
{% endblock %}
{% block modelcss %}
<style type="text/css">
body{
{#text-align:center;#}
}
#container{
width: 1200px;
height: 800px;
margin-left: 20%;
}
#title_data{
margin-left: 20%;
}
table{
text-align: center;
font-size: 25px;
}
</style>
{% endblock %}
</head>
<body>
{% block modeldiv %}
<div id="container" style="height: 800px;align: center"></div>
{% endblock %}
{% if NBAdata %}
{% block modelscript %}
<script type="text/javascript">
var ballgeme_list = new Array;
var winlist = new Array;
var transportlist = [];
var winratelist = [];
{% if GGGG %}
{% for data in GGGG %}
ballgeme_list.push('{{ data.ballgame }}');
winlist.push('{{ data.win }}');
transportlist.push('{{ data.transport }}');
winratelist.push('{{ data.winrate }}'.replace('%',''));
{% endfor %}
{% endif %}
var dom = document.getElementById("container");
var myChart = echarts.init(dom);
var app = {};
option = null;
option = {
title : {
text: '东部球队数据',
subtext: 'NBA',
},
tooltip : {
trigger: 'axis'
},
legend: {
data:['胜场(次)','败场(次)','胜率(百分比)']
},
toolbox: {
show : true,
feature : {
dataView : {show: true, readOnly: false},
magicType : {show: true, type: ['line', 'bar']},
restore : {show: true},
saveAsImage : {show: true}
}
},
calculable : true,
xAxis : [
{
type : 'category',
data : ballgeme_list,
}
],
yAxis : [
{
type : 'value'
}
],
series : [
{
name:'胜场(次)',
type:'bar',
data:winlist,
markPoint : {
data : [
{type : 'max', name: '最大值'},
{type : 'min', name: '最小值'},
]
},
markLine : {
data : [
]
}
},
{
name:'败场(次)',
type:'bar',
data: transportlist,
markPoint : {
data : [
{type : 'max', name: '最大值'},
{type : 'min', name: '最小值'},
]
},
markLine : {
data : [
]
}
},
{
name:'胜率(百分比)',
type:'bar',
data: winratelist,
markPoint : {
data : [
{type : 'max', name: '最大值'},
{type : 'min', name: '最小值'},
]
},
markLine : {
data : [
]
}
}
]
};
;
if (option && typeof option === "object") {
myChart.setOption(option, true);
}
</script>
{% endblock %}
{% endif %}
{% block modeltable %}
<div id="title_data" align="left">
<table width="1200px" border="2px" cellspacing="0" cellpadding="0">
<tr>
<td>排名</td>
<td>球队logo</td>
<td>球队</td>
<td>胜场</td>
<td>输场</td>
<td>胜率</td>
</tr>
{% if GGGG %}
{% for data in GGGG %}
<tr>
<td>{{ data.ranking }}</td>
<td width="25%">
<a href="{% url 'index:first' data.id %}">
<img src="{{ data.logopath }}" style="width: 50px">
</a>
</td>
<td>{{ data.ballgame }}</td>
<td>{{ data.win }}</td>
<td>{{ data.transport }}</td>
<td>{{ data.winrate }}</td>
</tr>
{% endfor %}
{% else %}
<p>Not Datas</p>
{% endif %}
</table>
</div>
{% endblock %}
</body>
</html>
主要的操作就是按照echart.js 的规则填上所需的数据就可以啦 下面展示一下网页
页面简陋 勿喷 勿喷 ! 完成啦 !! 开心!!>_<!!
https://github.com/jingziren-GG/django-start
项目在这里