基于随机森林回归预测的风机预警模型开发
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
plt.ion()
#prepare Data
inputLabel=["WindSpeed","GenPower","GenSpeed","OutDoorTemp","NacelleTemp"]
outputLabel=["GenBearDETemp","GenBearNDETemp","GenStatorUTemp","GenStatorVTemp", "GenStatorWTemp"]
dataPath="E:/DevRawModelProject/RawModelData/test/goodt20014001f.csv"
data_testPath="E:/DevRawModelProject/RawModelData/test/t20014001_test.csv"
df=pd.read_csv(dataPath)
df_test=pd.read_csv(data_testPath)
X_train, y_train=(df[inputLabel].values,df[outputLabel].values)
X_test, y_test=(df_test[inputLabel].values,df_test[outputLabel].values)
#cal main
max_depth =8
max_depth=max_depth
#random_state=2
regr_rf = RandomForestRegressor(max_depth=max_depth)
regr_rf.fit(X_train, y_train)
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=8,
max_features='auto', max_leaf_nodes=None,
min_impurity_split=1e-07, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
verbose=0, warm_start=False)
#test model
y_rf = regr_rf.predict(X_train)
y_test_rf = regr_rf.predict(X_test)
varR=np.var(y_train[:,2]-y_rf[:,2])
varT=np.var(y_train[:,2])
r2=1-varR/varT
print(r2)
0.820108692169
plt.plot(y_test[:,0])
plt.plot(y_test_rf[:,0],'r')
print(y_rf.shape)
#plot data
#plt.scatter(X_train,y_train,c="red", marker="+",alpha=0.5,label="Data")
plt.scatter(y_train[:,0],y_rf[:,0],c="cornflowerblue", alpha=0.5,label="predict")
plt.scatter(y_train[:,1],y_rf[:,1],marker="+",c="red", alpha=0.3,label="predict")
#plt.show()
(27627, 5)
<matplotlib.collections.PathCollection at 0xeffacf8>
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
X = X_train[:,0]
Y = X_train[:,1]
Z = y_train[:,2]
ax.scatter(X, Y, Z)
#ax = fig.add_subplot(111, projection='3d')
X = X_train[:,0]
Y = X_train[:,1]
Z =y_rf[:,2]
ax.scatter(X, Y, Z,c="red", marker="+",alpha=0.5,label="Data")
plt.show()
plt.scatter(y_train[:,0],y_train[:,1],c="cornflowerblue",marker="+",alpha=0.5,label="predict")
plt.scatter(y_rf[:,0],y_rf[:,1],marker="+",c="red", alpha=0.3,label="predict")
<matplotlib.collections.PathCollection at 0xd7a1a58>
df.columns
Index(['Unnamed: 0', 'TablePart', 'TurbineID', 'real_time', 'WindSpeed',
'GenPower', 'GenSpeed', 'OutDoorTemp', 'NacelleTemp', 'GenBearDETemp',
'GenBearNDETemp', 'GenStatorUTemp', 'GenStatorVTemp', 'GenStatorWTemp',
'con1'],
dtype='object')
plt.plot(y_train[:,0])
plt.plot(y_rf[:,0])
[<matplotlib.lines.Line2D at 0xb97ee80>]
plt.plot(y_train[:,1])
plt.plot(y_rf[:,1])
[<matplotlib.lines.Line2D at 0xf07def0>]
plt.plot(y_train[:,1]-y_rf[:,1])
[<matplotlib.lines.Line2D at 0xf128dd8>]
plt.plot(y_train[:,0]-y_rf[:,0])
[<matplotlib.lines.Line2D at 0xf18dd30>]
plt.plot(y_train[:,2]-y_rf[:,2])
[<matplotlib.lines.Line2D at 0xf1db080>]
plt.plot(X_train[:,2],y_train[:,2]-y_rf[:,2],'*')
#plt.title("T")
plt.show()
import numpy as np
varR=np.var(y_train[:,2]-y_rf[:,2])
varT=np.var(y_train[:,2])
r2=1-varR/varT
print(r2)
0.962437941174
y_test_rf = regr_rf.predict(X_test)
plt.plot(X_test[:,2],y_test[:,2]-y_test_rf[:,2],'*')
#plt.title("T")
plt.show()
plt.plot(y_test[:,0],y_test_rf[:,0],'*')
#plt.title("T")
plt.show()
import numpy as np
varR=np.var(y_test[:,2]-y_test_rf[:,2])
varT=np.var(y_test[:,2])
r2=1-varR/varT
print(r2)
0.703730968599
df.WindSpeed.min()
1.02
plt.plot(df['WindSpeed'],df['GenPower'],"+")
plt.plot(df_test['WindSpeed'],df_test['GenPower'],"o")
[<matplotlib.lines.Line2D at 0xf4e4470>]
plt.plot(y_test[:,0]-y_test_rf[:,0])
plt.plot(y_test[:,1]-y_test_rf[:,1])
[<matplotlib.lines.Line2D at 0xfd5e208>]
plt.plot(y_test[:,0])
plt.plot(y_test_rf[:,0],'r')
[<matplotlib.lines.Line2D at 0xfd2a6d8>]
plt.plot(y_train[:,2]-y_rf[:,2],y_train[:,3]-y_rf[:,3],'*')
#plt.title("T")
plt.show()