注:这是我参加招行Fintech精英训练营金融工程课程跟着做的笔记,代码是在Pycharm上写的。
里面用到的股票数据均来自雅虎财经(https://finance.yahoo.com/),数据下载方法我在(一)中有介绍。
量化交易基础:使用python处理金融数据
01-06 柱状图和散点图histograms and scatter plots
import pandasas pd
import numpyas np
import os
import matplotlib.pyplotas plt
def symbol_to_path(symbol, base_dir="data"):
return os.path.join(base_dir,"{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df =pd.DataFrame(index=dates)
for symbolin symbols:
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col="Date",parse_dates=True,
usecols=['Date','Adj Close'],
na_values=['nan'])
df_temp=df_temp.rename(columns={'Adj Close':symbol})
df=df.join(df_temp)
if symbol=='SPY':#drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
## 删除指定列中包含缺失值的行,此处即为删除SPY列中包含缺失值的行
return df
def plot_data(df, title="Stock prices"):
ax = df.plot(title=title,fontsize=2)
ax.set_xlabel("Data")
ax.set_ylabel("Price")
plt.show()#must be called to show plots in some environments
def get_rolling_mean(values, window):
"""Return rolling mean of given values, using specified window size."""
return values.rolling(window).mean()
def get_rolling_std(values, window):
"""Return rolling standard deviation of given values, using specified window size."""
return values.rolling(window).std()
def get_bollinger_bands(rm, rstd):
"""Return upper and lower Bollinger Bands."""
upper_band=rm+2*rstd
lower_band=rm-2*rstd
return upper_band, lower_band
def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.copy()#copy given dataframe to match size and column names
#computer daily returns for row 1 onwards
# daily_returns[1:]=(df[1:]/df[:-1].values)-1
daily_returns = (df / df.shift(1)) -1 #much easier with pandas!
daily_returns.iloc[0,:]=0 #set daily returns for row 0 to 0,otherwise nan default
return daily_returns
def test_run():
# dates=pd.date_range('2010-01-01','2012-12-31')
# symbols=['SPY']
# df=get_data(symbols,dates)
dates = pd.date_range('2010-01-01', '2010-12-31')
# symbols = ['SPY', 'XOM']
symbols = ['SPY', 'XOM','GLD']
df = get_data(symbols, dates)
daily_returns=compute_daily_returns(df)
#plot a histogram
# daily_returns.hist(bins=20) #bins=10 by default #一张图分两部分
#plot histograms on the same chart
# daily_returns['SPY'].hist(bins=20,label="SPY")
# daily_returns['XOM'].hist(bins=20, label="XOM")
# plt.legend(loc='upper right')
# plt.show()
# mean=daily_returns['SPY'].mean()
# print("mean=",mean)
# std = daily_returns['SPY'].std()
# print("standard deviation=", std)
# plt.axvline(mean,color='w',linestyle='dashed',linewidth=2)
# plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
# plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
# plt.show()
# print(daily_returns.kurtosis())
#scatterplot SPY vs XOM
daily_returns.plot(kind='scatter',x='SPY',y='XOM')
#polyfit(x,y,degree)
daily_returns = daily_returns.dropna()
beta_XOM,alpha_XOM=np.polyfit(daily_returns['SPY'],daily_returns['XOM'],1)
plt.plot(daily_returns['SPY'],beta_XOM*daily_returns['SPY']+alpha_XOM,'-',color='r')
plt.show()
# scatterplot SPY vs GLD
daily_returns.plot(kind='scatter', x='SPY', y='GLD')
plt.show()
#calculate correlation coefficient
print(daily_returns.corr(method='pearson'))
if __name__=="__main__":
test_run()