每月几千笔不重复银行转账业务,每笔跨行转账都需要狗屁的联行号,很无奈啊
话说手机转账都不需要这玩意,干嘛非要跟工作电脑过不去...
爬取联行号并保存到excel,简陋,但已经可以用了,未完成...
import lxml
import csv
from bs4 import BeautifulSoup
import requests
header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
file = open('numbank.csv', 'w', newline='')
csvwriter = csv.writer(file)
listbank=[]
for i in range (1,3):
url='http://www.5cm.cn/bank/nanjing/'+str(i)+'/'
html=requests.get(url,headers=header).text
soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
banklist = soup.find_all('tr')[1:]
for row in banklist:
print (row)
banknum = row.find_all('td')[0].text
bankname = row.find_all('td')[1].text
# 如果喜欢,可以继续抓取其他信息,例:banktel = row.find_all('td')[2].text
listbank.append([banknum,bankname])
print (listbank)
csvwriter.writerows(listbank)
file.close
爬取完整信息存档
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import lxml
import csv
from bs4 import BeautifulSoup
import requests
import time
import random
from retrying import retry
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
listbank = []
citys = ['jiangsu','guangdong','shandong','hebei','zhejiang','fujian','liaoning','anhui','hubei','sichuan','shanxisheng','hunan','shanxi','guizhou','henan','heilongjiang','jilin','xinjiang','shanghai','gansu','yunnan','beijing','neimenggu','tianjin','jiangxi','chongqing','guangxi','ningxia','hainan','qinghai','xianggang','xicang','aomen']
for idx, i in enumerate(citys, 1): #idx数组索引,用于抓取过程中观察进度,也可用for i in citys:
file = open('backnum.csv', 'w', newline = '')
csvwriter = csv.writer(file)
for j in range (1,333): #抓取页面数例1,666
url = 'http://www.5cm.cn/bank/'+str(i)+'/'+str(j)+'/'
#构建retry装饰器
@retry(stop_max_attempt_number=3, wait_fixed=3000) #重试次数,等待时间毫秒
def get_request(url):
html = requests.get(url, headers=headers, timeout=3).text
#html.encoding='utf8'
print ('正在抓取:' + str(idx) + '/33的' + str(i) + '省,' +'进度: {:.2%}'.format(j/333))
soup = BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
banklist = soup.find_all('tr')[1:]
for row in banklist:
#print (row)
banknum = row.find_all('td')[0].text
bankname = row.find_all('td')[1].text
banktel = row.find_all('td')[2].text
bankadr = row.find_all('td')[4].text
bankcity = str(i)
#soup.find_all(attrs={'class':'w-100'}):
#soup.select('.text-nowrap a'):
#list1.append(j.get('title'))
listbank.append([banknum,bankname,banktel,bankadr,bankcity])
get_request(url) #调用装饰器
print(str(listbank[-1][:3]))
print('···省略显示···若干记录···')
sum = random.randint(3,33) #设置省份随机间隔倒计时
interval = 0.3 #设置屏幕刷新的间隔时间
for y in range(0,int(sum/interval)+1):
print("\r休息一会:" + "|" +"*"*y + " "*(int(sum/interval)+1-y)+"|" +'体力恢复: {:.0f}%'.format(y/(sum/interval)*100),end="")
time.sleep(interval)
print("\r满血复活!")
time.sleep(1)
csvwriter.writerows(listbank)
print ('正在写入到EXCEL···请稍候···')
print ('---')
file.close
print ('写入完成,文件名backnum.csv,收工!')
pyinstaller打包联行号查询工具(cmd 版)
#!/usr/bin/env python
import os
import sys
#生成资源文件目录访问路径
def resource_path(relative_path):
base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
return os.path.join(base_path, relative_path)
# os.startfile(resource_path('banks.txt'))
# pyinstaller -F --add-data banks.txt;. banknum.py
banks = open(resource_path('banks.txt'), 'r', encoding='utf-8')
#banks = open('banks.txt', 'r', encoding='utf-8')
# 访问temp目录banks.txt的内容
lines = banks.readlines()
def inputx():
print('--------------------------------------------')
x = input('1.查询银行名称或行号\n3.退出\n$ 请输入序号数字后回车:')
if x == str(1):
inputy()
elif x == str(3):
banks.close()
sys.exit()
else:
print('--------------------------------------------')
print('# 注意:输入错误,请按屏幕提示操作!')
inputx()
pass
def inputy():
print('--------------------------------------------')
y = input('$ 输入查询关键字后回车:')
if y == '':
print('--------------------------------------------')
print('# 注意:关键字不能为空,请重新输入!')
inputy()
else:
print('--------------------------------------------')
print('关键字' + '<' + y + '>' + '查询中,请稍后!')
print('--------------------------------------------')
print('# 注意:查询结果为空将不显示任何信息!')
selecty(y)
pass
def selecty(y):
for line in lines:
line = line.strip()
if y in line:
print(line)
pass
inputx()
if __name__ == '__main__':
os.system("mode con cols=150 lines=30")
print('--------------------------------------------')
print('- 联行号查询工具 - by lala 2020 ')
inputx()
pass
pyinstaller打包联行号查询工具(TK 界面版)
# ! /usr/bin/env python
# encoding:utf-8
import os
import sys
from tkinter import *
from tkinter import ttk
def resource_path(relative_path):
base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
return os.path.join(base_path, relative_path)
banks = open(resource_path('banks.txt'), 'r', encoding='utf-8')
lines = banks.readlines()
def statusx(*args):
try:
loader_entry.delete(0, 'end')
loader_entry.focus()
except searchxError:
pass
def clsx(*args):
try:
txtout.configure(state='normal')
loader_entry.delete(0, 'end')
loader_entry.focus()
meters.set('输入框已清空,请指示.')
txtout.configure(state='disabled')
except searchxError:
pass
def likes(*args):
try:
txtout.configure(state='normal')
txtout.insert('1.0', '\n')
txtout.image_create('1.0', image=photo)
meters.set('用支付宝或微信打赏我一杯咖啡好吗.(^_^)♪')
txtout.insert('1.0', '\n')
txtout.configure(state='disabled')
except searchxError:
pass
def queryx(*args):
try:
txtout.configure(state='normal')
txtout.delete('1.0','end')
searchx = loader.get()
xlan = str(len(searchx))
if searchx == '':
txtout.insert('1.0', msg)
meters.set('查询关键字不能为空,请重新输入.')
else:
for line in lines:
line = line.strip()
if searchx in line:
meters.set('关键字' + ' [' + searchx + '] ' + '查询已完成,双击内容后按Ctrl+C复制.')
txtout.insert('1.0', '------------\n' + line + '\n')
start = 1.0
while True:
pos = txtout.search(searchx, start, stopindex ='end')
if not pos:
break
txtout.tag_add('tagx', pos, '{}+{}c'.format(pos, xlan))
start = pos + '+1c' # 将 start 指向下一个字符
txtout.insert('end', msg + '\n')
txtout.configure(state='disabled')
except searchxError:
pass
root = Tk()
root.title('联行号查询工具 v1.3')
mainframe = ttk.Frame(root, padding='12 6 12 12')
mainframe.grid(column=0, row=0, sticky='nwes')
# root.columnconfigure(0, weight=1)
# root.rowconfigure(0, weight=1)
root.resizable(0,0)
meters = StringVar()
loader_label = ttk.Label(mainframe, text='当前状态:').grid(column=1, row=1, sticky='w')
loader_label = ttk.Label(mainframe, textvariable=meters).grid(column=1, row=1, sticky='w', padx=60)
meters.set('初始化完成,数据版本202012,请指示.')
loader = StringVar()
loader_entry = ttk.Entry(mainframe, width=23, textvariable=loader)
loader_entry.grid(column=3, row=1, sticky='e', padx=160)
ttk.Button(mainframe, text='←', width=3, command=clsx).grid(column=3, row=1, sticky='e', padx=125)
ttk.Button(mainframe, text='¥.', width=3, command=likes).grid(column=3, row=1, sticky='e', padx=90)
ttk.Button(mainframe, text='查询', command=queryx).grid(column=3, row=1, sticky='e')
photo = PhotoImage(file=resource_path('like.png'))
msg = '\n .__ .__ \n | | _____ | | _____ \n | | \__ \ | | \__ \ \n | |__/ __ \| |__/ __ \_\n |____(____ /____(____ /\n \/ \/ \n------------\n# 使用说明:\n------------\n 输入银行名称或行号点击查询按钮或回车。双击查询结果,Ctrl+C复制,CTRL+V粘贴,查询结果为空将不显示任何内容。\n-------------------------\n@ 2020 by lala q.33818121\n'
txtout = Text(mainframe, width=133, height=33)
txtout.grid(column=1, row=2, columnspan=3, sticky='nwes')
txtout.insert('1.0', msg)
txtout.tag_configure('tagx', foreground='red') #mediumaquamarine backgroun='yellow', foreground='red'
txtout.configure(state='disabled')
sbar = ttk.Scrollbar(mainframe, orient = 'vertical', command = txtout.yview)
txtout['yscrollcommand'] = sbar.set
sbar.grid(column=4, row=2, sticky = 'ns')
for child in mainframe.winfo_children():
child.grid_configure(pady=5) # child.grid_configure(padx=0, pady=5) 水平外边距,垂直外边距
loader_entry.focus() # 获取焦点
root.bind('<Return>', queryx) # 绑定回车键
root.mainloop()
# pyinstaller -F -w --add-data banks.txt;. --add-data like.png;. bankttk.py
下一步计划,批量付款明细用excel公式根据城市信息自动匹配银行行号