部分上交所年报在定期报告栏目,发行上市公告栏目都取不到,在最新公告栏目能够拿到数据;
该程序爬取最新公告数据
import json
import requests
import datetime
def noticeToAnnals(stock_code, START_DATE='2022-01-01', END_DATE=datetime.datetime.now().strftime('%Y-%m-%d')):
URL_QUERY_COMPANY = 'http://query.sse.com.cn/commonQuery.do'
HEADER = {
'Referer': 'http://www.sse.com.cn',
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}
param = {
'jsonCallBack': 'jsonpCallback72875491',
'isPagination': 'true',
'pageHelp.pageSize': '25',
'pageHelp.cacheSize': '1',
'type': 'inParams',
'sqlId': 'COMMON_PL_SSGSXX_ZXGG_L',
'START_DATE': START_DATE,
'END_DATE': END_DATE,
'SECURITY_CODE': stock_code,
'TITLE': '',
'BULLETIN_TYPE': '0101',
'pageHelp.pageNo': '1',
'pageHelp.beginPage': '1',
'pageHelp.endPage': '1',
'_': '1651221153479'
}
def listIndex(test):
for info in test:
if '年度报告' in info and '摘要' not in info:
return test.index(info)
data = requests.get(URL_QUERY_COMPANY, params=param, headers=HEADER).text.replace('jsonpCallback72875491(',
'').rstrip(')')
info = json.loads(data)['result']
for i in info:
TITLE = str(i['TITLE']).split('<br>')
index = listIndex(TITLE)
SSEDATE = str(i['SSEDATE']).split('<br>')
OLD_BULLETIN_TYPE = str(i['OLD_BULLETIN_TYPE']).split('<br>')
BULLETIN_YEAR = str(i['BULLETIN_YEAR']).split('<br>')
ORG_BULLETIN_TYPE = str(i['ORG_BULLETIN_TYPE']).split('<br>')
URL = str(i['URL']).split('<br>')
SECURITY_NAME = str(i['SECURITY_NAME']).split('<br>')
print(
f'SSEDATE:{SSEDATE[index]}\nOLD_BULLETIN_TYPE:{OLD_BULLETIN_TYPE[index]}\nBULLETIN_YEAR:{BULLETIN_YEAR[index]}\nORG_BULLETIN_TYPE:{ORG_BULLETIN_TYPE[index]}'
f'\nURL:{URL[index]}\nTITLE:{TITLE[index]}')
# print(f'SSEDATE:{SSEDATE[index]}\nOLD_BULLETIN_TYPE:{OLD_BULLETIN_TYPE[index]}\nBULLETIN_YEAR{BULLETIN_YEAR[index]}\nORG_BULLETIN_TYPE:{ORG_BULLETIN_TYPE[index]}\n '
# f'URL:{URL[index]}\nSECURITY_NAME:{SECURITY_NAME[index]}\nTITLE:{TITLE[index]}')
if __name__ == '__main__':
noticeToAnnals('603993')