1.读取Iris数据集细节资料
from sklearn.datasets import load_iris
iris = load_iris()
print iris.data.shape
查看数据说明
print iris.DESCR
2.对Iris数据集进行分割
from sklearn.cross_validation import train_test_split
x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.25,random_state=33)
3.使用K近邻分类器对Iris数据进行类型预测
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
ss = StandardScaler()
x_train = ss.fit_transform(x_train)
x_test = ss.transform(x_test)
knc = KNeighborsClassifier()
knc.fit(x_train,y_train)
y_predict = knc.predict(x_test)
4.对K近邻分类器在Iris数据上的预测性能进行评估
print "accuracy is: ",knc.score(x_test,y_test)
from sklearn.metrics import classification_report
print classification_report(y_test,y_predict,target_names=iris.target_names)
5.输出
accuracy is: 0.894736842105
precision recall f1-score support
setosa 1.00 1.00 1.00 8
versicolor 0.73 1.00 0.85 11
virginica 1.00 0.79 0.88 19
avg / total 0.92 0.89 0.90 38