一. 大致流程如下
股票历史数据
股票技术指标
股票宏观指标
股吧文本数据
1.特征选择
2.股吧文本情感分析
2.1文本数据预处理
2.2 基于LSTM的情感分析模型
2.3 处理结果
参考代码
https://github.com/Edward1Chou/SentimentAnalysis
3.趋势预测
####################load data
def load_data(df, sequence_length=10, split=0.8):
# 转为float型
data_all = np.array(df.astype(float))
# 标准化
x_scaler = MinMaxScaler()
data_all[:, :-1] = x_scaler.fit_transform(data_all[:, :-1])
data = []
for i in range(len(data_all) - sequence_length - 1):
data.append(data_all[i: i + sequence_length + 1])
reshaped_data = np.array(data).astype('float64')
x = reshaped_data[:, :-1]
y = reshaped_data[:, -1][:, -1]
scaler = MinMaxScaler()
y = scaler.fit_transform(y.reshape(-1, 1))
split_boundary = int(reshaped_data.shape[0] * split)
train_x = x[: split_boundary]
test_x = x[split_boundary:]
train_y = y[: split_boundary]
test_y = y[split_boundary:]
return train_x, train_y, test_x, test_y, scaler
def build_model(layers):
model = Sequential()
# By setting return_sequences to True we are able to stack another LSTM layer
model.add(LSTM(
input_dim=layers[0],
output_dim=layers[1],
return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(
layers[2],
return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(
output_dim=layers[3]))
model.add(Activation("linear"))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop", metrics=['accuracy'])
print("Compilation Time : ", time.time() - start)
return model
def train_model(train_x, train_y, test_x, test_y):
model = build_model([train_x.shape[2], train_x.shape[1], 200, 1])
try:
model.fit(train_x, train_y, batch_size=40, nb_epoch=200, validation_split=0.1)
predict = model.predict(test_x)
predict = np.reshape(predict, (predict.size, ))
except KeyboardInterrupt:
print(predict)
print(test_y)
try:
fig = plt.figure(1)
plt.plot(predict, 'r:')
plt.plot(test_y, 'g-')
plt.legend(['predict', 'true'])
except Exception as e:
print(e)
return predict, test_y , model
本文代码已上传Github
https://github.com/tutan123/StockTrendPrediction
发表论文: