本文旨在进行时间序列预测,采用seq2seq+attention的模型架构进行预测,数据集样式如下:
一、导入包和数据
我都tensorflow版本是2.7,keras版本2.7
import csv
import numpy as np
from keras import backend as K
from keras.models import Model
from keras.layers import LSTM, Dense, Activation, Lambda, RepeatVector, Input, Reshape, Concatenate, Dot
from sklearn.preprocessing import MinMaxScaler
import os
import errno
import math
def file_processing(file_path, encode=None):
data = []
with open(file_path, encoding=encode) as file:
rows = csv.reader(file, delimiter=",")
n_row = 0
for row in rows:
if n_row != 0:
#column -> 0: code, 1: date
for column in range(2, len(row)):
data[n_row - 1].append(float(row[column].strip()))
data.append([])
n_row += 1
del data[-1]
return np.array(data)
data = file_processing('20180504_50.csv')
二、数据集的处理
####归一化
def normalize_data(data, scaler, feature_len):
minmaxscaler = scaler.fit(data)
normalize_data = minmaxscaler.transform(data)
return normalize_data
###处理成时序模型所需要的样本
def load_data(data, time_step=20, after_day=1, validate_percent=0.67):
seq_length = time_step + after_day
result = []
for index in range(len(data) - seq_length + 1):
result.append(data[index: index + seq_length])
result = np.array(result)
print('total data: ', result.shape)
train_size = int(len(result) * validate_percent)
train = result[:train_size, :]
validate = result[train_size:, :]
x_train = train[:, :time_step]
y_train = train[:, time_step:]
x_validate = validate[:, :time_step]
y_validate = validate[:, time_step:]
return [x_train, y_train, x_validate, y_validate]
time_step = 20 ###处理时序特征的时候20个作为一组
feature_len = data.shape[1] ###数据中有几列数据需要预测
scaler = MinMaxScaler(feature_range=(0, 1)) ###归一化
validate_percent = 0.8 ###验证集训练集拆分比例2:8
after_day = 6 ####预测之后6天
# 数据归一化
data = normalize_data(data, scaler, feature_len)
# 以最后的历史样本作为测试集
x_test = data[-time_step:]
x_test = np.reshape(x_test, (1, x_test.shape[0], x_test.shape[1]))
# 得到训练集和验证集
x_train, y_train, x_validate, y_validate = load_data(
data, time_step=time_step, after_day=after_day, validate_percent=validate_percent)
###一组20个特征,标签维度是6, 5列,用一步一步推进的方式构造出1025条训练样本
#x_train.shape, y_train.shape: (1025, 20, 5) (1025, 6, 5)
##x_validate.shape, y_validate.shape: (257, 20, 5) (257, 6, 5)
数据处理样式如图:
三、模型建立
使用seq2seq+attention方式构建时序预测模型。
def softmax(x, axis=1):
"""Softmax activation function.
# Arguments
x : Tensor.
axis: Integer, axis along which the softmax normalization is applied.
# Returns
Tensor, output of softmax transformation.
# Raises
ValueError: In case `dim(x) == 1`.
"""
ndim = K.ndim(x)
if ndim == 2:
return K.softmax(x)
elif ndim > 2:
e = K.exp(x - K.max(x, axis=axis, keepdims=True))
s = K.sum(e, axis=axis, keepdims=True)
return e / s
else:
raise ValueError('Cannot apply softmax to a tensor that is 1D')
def mean_squared_error(y_true, y_pred):
return K.mean(K.square(y_pred[:,:,3] - y_true[:,:,3]), axis=-1)
def one_step_attention(a, s_prev, repeator, concatenator, densor, activator, dotor):
##将s_prev复制
s_prev = repeator(s_prev)
### 拼接BiRNN隐层状态与s_prev
concat = concatenator([s_prev, a])
e = densor(concat)
alphas = activator(e)
# 加权得到Context Vector
context = dotor([alphas, a])
return context
def seq2seq_attention(feature_len=1, after_day=1, input_shape=(20, 1), time_step=20):
# Define the inputs of your model with a shape (Tx, feature)
X = Input(shape=input_shape)
###Decoder端LSTM的初始状态
s0 = Input(shape=(100, ), name='s0')
c0 = Input(shape=(100, ), name='c0')
##Decoder前一时刻解码的输出s
s = s0
##Eecoder输出的c
c = c0
# 模型输出列表,用来存储预测的结果
all_outputs = []
# Encoder: pre-attention LSTM
encoder = LSTM(units=100, return_state=False, return_sequences=True, name='encoder')
# Decoder: post-attention LSTM
decoder = LSTM(units=100, return_state=True, name='decoder')
# Output
decoder_output = Dense(units=feature_len, activation='linear', name='output')
model_output = Reshape((1, feature_len))
# Attention
repeator = RepeatVector(time_step)
concatenator = Concatenate(axis=-1)
densor = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes = 1)
encoder_outputs = encoder(X)
for t in range(after_day):
context = one_step_attention(encoder_outputs, s, repeator, concatenator, densor, activator, dotor)
a, s, c = decoder(context, initial_state=[s, c])
outputs = decoder_output(a)
outputs = model_output(outputs)
all_outputs.append(outputs)
all_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
model = Model(inputs=[X, s0, c0], outputs=all_outputs)
return model
####输入每条构建样本大小
input_shape = (time_step, feature_len)
model = seq2seq_attention(feature_len, after_day, input_shape, time_step)
model.compile(loss=mean_squared_error, optimizer='adam')
model.summary()
四、模型的训练和评估
batch_size = 64
epochs = 300
def save_model(model, model_name):
file_path = 'model_test/{}.h5'.format(model_name)
if not os.path.exists(os.path.dirname(file_path)):
try:
os.makedirs(os.path.dirname(file_path))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
model.save(file_path)
s0_train = np.zeros((x_train.shape[0],100))
c0_train = np.zeros((x_train.shape[0],100))
s0_validate = np.zeros((x_validate.shape[0],100))
c0_validate = np.zeros((x_validate.shape[0],100))
s0_test = np.zeros((x_test.shape[0],100))
c0_test = np.zeros((x_test.shape[0],100))
###模型的训练
history = model.fit(
[x_train, s0_train, c0_train], y_train,
batch_size=batch_size, epochs=epochs,
validation_data=([x_validate, s0_validate, c0_validate], y_validate))
print('-' * 100)
train_score = model.evaluate([x_train,s0_train,c0_train], y_train, batch_size=batch_size, verbose=0)
print('Train Score: %.8f MSE (%.8f RMSE)' % (train_score, math.sqrt(train_score)))
validate_score = model.evaluate([x_validate,s0_validate,c0_validate], y_validate, batch_size=batch_size, verbose=0)
print('Test Score: %.8f MSE (%.8f RMSE)' % (validate_score, math.sqrt(validate_score)))
model_class_name = 'model23'
save_model(model, model_name=model_class_name)
五、模型预测
train_predict = model.predict([x_train,s0_train,c0_train])
validate_predict = model.predict([x_validate,s0_validate,c0_validate])
test_predict = model.predict([x_test,s0_test,c0_test]) ####预测出的真实值
###反归一化 归一化的数据要转换回去
def inverse_normalize_data(data, scaler):
for i in range(len(data)):
data[i] = scaler.inverse_transform(data[i])
return data
train_predict = inverse_normalize_data(train_predict, scaler)
y_train = inverse_normalize_data(y_train, scaler)
validate_predict = inverse_normalize_data(validate_predict, scaler)
y_validate = inverse_normalize_data(y_validate, scaler)
test_predict = inverse_normalize_data(test_predict, scaler)
print(test_predict) ###接下来6天的预测结果
六、完整代码如下
import csv
import numpy as np
from keras import backend as K
from keras.models import Model
from keras.layers import LSTM, Dense, Activation, Lambda, RepeatVector, Input, Reshape, Concatenate, Dot
from sklearn.preprocessing import MinMaxScaler
import os
import errno
import math
def file_processing(file_path, encode=None):
data = []
with open(file_path, encoding=encode) as file:
rows = csv.reader(file, delimiter=",")
n_row = 0
for row in rows:
if n_row != 0:
#column -> 0: code, 1: date
for column in range(2, len(row)):
data[n_row - 1].append(float(row[column].strip()))
data.append([])
n_row += 1
del data[-1]
return np.array(data)
####归一化
def normalize_data(data, scaler, feature_len):
minmaxscaler = scaler.fit(data)
normalize_data = minmaxscaler.transform(data)
return normalize_data
###处理成时序模型所需要的样本
def load_data(data, time_step=20, after_day=1, validate_percent=0.67):
seq_length = time_step + after_day
result = []
for index in range(len(data) - seq_length + 1):
result.append(data[index: index + seq_length])
result = np.array(result)
print('total data: ', result.shape)
train_size = int(len(result) * validate_percent)
train = result[:train_size, :]
validate = result[train_size:, :]
x_train = train[:, :time_step]
y_train = train[:, time_step:]
x_validate = validate[:, :time_step]
y_validate = validate[:, time_step:]
return [x_train, y_train, x_validate, y_validate]
def softmax(x, axis=1):
"""Softmax activation function.
# Arguments
x : Tensor.
axis: Integer, axis along which the softmax normalization is applied.
# Returns
Tensor, output of softmax transformation.
# Raises
ValueError: In case `dim(x) == 1`.
"""
ndim = K.ndim(x)
if ndim == 2:
return K.softmax(x)
elif ndim > 2:
e = K.exp(x - K.max(x, axis=axis, keepdims=True))
s = K.sum(e, axis=axis, keepdims=True)
return e / s
else:
raise ValueError('Cannot apply softmax to a tensor that is 1D')
def mean_squared_error(y_true, y_pred):
return K.mean(K.square(y_pred[:,:,3] - y_true[:,:,3]), axis=-1)
def one_step_attention(a, s_prev, repeator, concatenator, densor, activator, dotor):
##将s_prev复制
s_prev = repeator(s_prev)
### 拼接BiRNN隐层状态与s_prev
concat = concatenator([s_prev, a])
e = densor(concat)
alphas = activator(e)
# 加权得到Context Vector
context = dotor([alphas, a])
return context
def seq2seq_attention(feature_len=1, after_day=1, input_shape=(20, 1), time_step=20):
# Define the inputs of your model with a shape (Tx, feature)
X = Input(shape=input_shape)
###Decoder端LSTM的初始状态
s0 = Input(shape=(100, ), name='s0')
c0 = Input(shape=(100, ), name='c0')
##Decoder前一时刻解码的输出s
s = s0
##Eecoder输出的c
c = c0
# 模型输出列表,用来存储预测的结果
all_outputs = []
# Encoder: pre-attention LSTM
encoder = LSTM(units=100, return_state=False, return_sequences=True, name='encoder')
# Decoder: post-attention LSTM
decoder = LSTM(units=100, return_state=True, name='decoder')
# Output
decoder_output = Dense(units=feature_len, activation='linear', name='output')
model_output = Reshape((1, feature_len))
# Attention
repeator = RepeatVector(time_step)
concatenator = Concatenate(axis=-1)
densor = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes = 1)
encoder_outputs = encoder(X)
for t in range(after_day):
context = one_step_attention(encoder_outputs, s, repeator, concatenator, densor, activator, dotor)
a, s, c = decoder(context, initial_state=[s, c])
outputs = decoder_output(a)
outputs = model_output(outputs)
all_outputs.append(outputs)
all_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
model = Model(inputs=[X, s0, c0], outputs=all_outputs)
return model
def save_model(model, model_name):
file_path = 'model_test/{}.h5'.format(model_name)
if not os.path.exists(os.path.dirname(file_path)):
try:
os.makedirs(os.path.dirname(file_path))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
model.save(file_path)
###反归一化
def inverse_normalize_data(data, scaler):
for i in range(len(data)):
data[i] = scaler.inverse_transform(data[i])
return data
if __name__ == '__main__':
data = file_processing('20180504_50.csv')
time_step = 20 ###处理时序特征的时候20个作为一组
feature_len = data.shape[1] ###数据中有几列数据需要预测
scaler = MinMaxScaler(feature_range=(0, 1)) ###归一化
validate_percent = 0.8 ###验证集训练集拆分比例2:8
after_day = 6 ####预测之后6天
# 数据归一化
data = normalize_data(data, scaler, feature_len)
# 以最后的历史样本作为测试集
x_test = data[-time_step:]
x_test = np.reshape(x_test, (1, x_test.shape[0], x_test.shape[1]))
# 得到训练集和验证集
x_train, y_train, x_validate, y_validate = load_data(
data, time_step=time_step, after_day=after_day, validate_percent=validate_percent)
###一组20个特征,6个标签,5列,用一步一步推进的方式构造出1025条训练样本
#x_train.shape, y_train.shape: (1025, 20, 5) (1025, 6, 5)
##x_validate.shape, y_validate.shape: (257, 20, 5) (257, 6, 5)
####输入每条构建样本大小
input_shape = (time_step, feature_len)
model = seq2seq_attention(feature_len, after_day, input_shape, time_step)
model.compile(loss=mean_squared_error, optimizer='adam')
model.summary()
batch_size = 64
epochs = 300
s0_train = np.zeros((x_train.shape[0],100))
c0_train = np.zeros((x_train.shape[0],100))
s0_validate = np.zeros((x_validate.shape[0],100))
c0_validate = np.zeros((x_validate.shape[0],100))
s0_test = np.zeros((x_test.shape[0],100))
c0_test = np.zeros((x_test.shape[0],100))
history = model.fit(
[x_train, s0_train, c0_train], y_train,
batch_size=batch_size, epochs=epochs,
validation_data=([x_validate, s0_validate, c0_validate], y_validate))
print('-' * 100)
train_score = model.evaluate([x_train,s0_train,c0_train], y_train, batch_size=batch_size, verbose=0)
print('Train Score: %.8f MSE (%.8f RMSE)' % (train_score, math.sqrt(train_score)))
validate_score = model.evaluate([x_validate,s0_validate,c0_validate], y_validate, batch_size=batch_size, verbose=0)
print('Test Score: %.8f MSE (%.8f RMSE)' % (validate_score, math.sqrt(validate_score)))
model_class_name = 'model22'
save_model(model, model_name=model_class_name)
train_predict = model.predict([x_train,s0_train,c0_train])
validate_predict = model.predict([x_validate,s0_validate,c0_validate])
test_predict = model.predict([x_test,s0_test,c0_test]) ####预测出的真实值
train_predict = inverse_normalize_data(train_predict, scaler)
y_train = inverse_normalize_data(y_train, scaler)
validate_predict = inverse_normalize_data(validate_predict, scaler)
y_validate = inverse_normalize_data(y_validate, scaler)
test_predict = inverse_normalize_data(test_predict, scaler)
print(test_predict)
七、总结
seq2seq(sequence to sequence)模型一般包括两个部分:编码器(encoder)和解码器(decoder),其中编码器主要是用于处理序列信息,解码器将这个信息处理出来得到一个输出的序列,这样的模型允许使用长度不同的输入和输出序列。而Attention将底层特征直接传递至网络的后层,可在一定程度上解决细节丢失和梯度消失的问题。