该文章仅供学习参考
数据平台为baostock,网址为:www.baostock.com
本文的行业csv文件获取需要参考上一篇文章:
《1-PYTHON-BAOSTOCK-A股上市公司行业分类信息》
https://www.jianshu.com/p/2412417ae0a3
1.修改工作路径
import os
# os.getcwd() 获取当前工作路径
os.chdir(r'E:\行业分析')
os.getcwd()
2.读取以前保存的csv文件,比如选择“银行业”作为分析对象
我们可以将dataframe的code列存成列表,方便后续遍历取数据
#将股票代码存进列表中方便后续遍历
import pandas as pd
df=pd.read_csv(r'E:\行业分析\各行业股票代码\银行.csv')
code_list=list(df['code'])
# code_list
3.获取数据-假设我们研究的是2019年度的年报财务数据,可能需要等待一些时间
import baostock as bs
#设置日期:下面的日期可以换成year=date_list[0],quarter=date_list[1]
date_list=[2019,4]
# 登陆系统
lg = bs.login()
# 显示登陆返回信息
print('login respond error_code:'+lg.error_code)
print('login respond error_msg:'+lg.error_msg)
# 1查询季频估值指标盈利能力
profit_list = []
for i in code_list:
rs_profit = bs.query_profit_data(code=i, year=date_list[0], quarter=date_list[1])
while (rs_profit.error_code == '0') & rs_profit.next():
profit_list.append(rs_profit.get_row_data())
result_profit = pd.DataFrame(profit_list, columns=rs_profit.fields)
# 2营运能力
operation_list = []
for i in code_list:
rs_operation = bs.query_operation_data(code=i, year=date_list[0], quarter=date_list[1])
while (rs_operation.error_code == '0') & rs_operation.next():
operation_list.append(rs_operation.get_row_data())
result_operation = pd.DataFrame(operation_list, columns=rs_operation.fields)
# 3偿债能力
balance_list = []
for i in code_list:
rs_balance = bs.query_balance_data(code=i, year=date_list[0], quarter=date_list[1])
while (rs_balance.error_code == '0') & rs_balance.next():
balance_list.append(rs_balance.get_row_data())
result_balance = pd.DataFrame(balance_list, columns=rs_balance.fields)
# 4成长能力
growth_list = []
for i in code_list:
rs_growth = bs.query_growth_data(code=i, year=date_list[0], quarter=date_list[1])
while (rs_growth.error_code == '0') & rs_growth.next():
growth_list.append(rs_growth.get_row_data())
result_growth = pd.DataFrame(growth_list, columns=rs_growth.fields)
# 5季频现金流量
cash_flow_list = []
for i in code_list:
rs_cash_flow = bs.query_cash_flow_data(code=i, year=date_list[0], quarter=date_list[1])
while (rs_cash_flow.error_code == '0') & rs_cash_flow.next():
cash_flow_list.append(rs_cash_flow.get_row_data())
result_cash_flow = pd.DataFrame(cash_flow_list, columns=rs_cash_flow.fields)
# 6查询杜邦指数
dupont_list = []
for i in code_list:
rs_dupont = bs.query_dupont_data(code=i, year=date_list[0], quarter=date_list[1])
while (rs_dupont.error_code == '0') & rs_dupont.next():
dupont_list.append(rs_dupont.get_row_data())
result_dupont = pd.DataFrame(dupont_list, columns=rs_dupont.fields)
# 打印输出
# print(result_balance)
# 结果集输出到csv文件
result_profit.to_csv(r"E:\行业分析\行业财务数据获取\profit_data.csv", encoding="gbk", index=False)
result_operation.to_csv(r"E:\行业分析\行业财务数据获取\operation_data.csv", encoding="gbk", index=False)
result_balance.to_csv(r"E:\行业分析\行业财务数据获取\balance_data.csv", encoding="gbk", index=False)
result_growth.to_csv(r"E:\行业分析\行业财务数据获取\growth_data.csv", encoding="gbk", index=False)
result_cash_flow.to_csv(r"E:\行业分析\行业财务数据获取\cash_flow_data.csv", encoding="gbk", index=False)
result_dupont.to_csv(r"E:\行业分析\行业财务数据获取\dupont_data.csv", encoding="gbk", index=False)
#结果集输出到excel文件
result_profit.to_excel(r"E:\行业分析\行业财务数据获取\profit_data.xlsx", encoding="gbk", index=False)
result_operation.to_excel(r"E:\行业分析\行业财务数据获取\operation_data.xlsx", encoding="gbk", index=False)
result_balance.to_excel(r"E:\行业分析\行业财务数据获取\balance_data.xlsx", encoding="gbk", index=False)
result_growth.to_excel(r"E:\行业分析\行业财务数据获取\growth_data.xlsx", encoding="gbk", index=False)
result_cash_flow.to_excel(r"E:\行业分析\行业财务数据获取\cash_flow_data.xlsx", encoding="gbk", index=False)
result_dupont.to_excel(r"E:\行业分析\行业财务数据获取\dupont_data.xlsx", encoding="gbk", index=False)
# 登出系统
bs.logout()
4.利用pandas的concat函数可以将这些数据横向连接到一张表内,可以选择后续用pandas处理,或者在excel表格中处理数据。
all_financial_info=pd.concat([result_profit,result_operation,result_balance,result_growth,result_cash_flow,result_dupont],axis=1)
# all_financial_info
## 结果集输出到csv文件
all_financial_info.to_csv(r"E:\行业分析\行业财务数据获取\all_financial_info.csv", encoding="gbk", index=False)
#结果集输出到excel文件
all_financial_info.to_excel(r"E:\行业分析\行业财务数据获取\all_financial_info.xlsx", encoding="gbk", index=False)