利用sklearn KNN实现手写数字识别
```
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
```
```
def imgTovector(filename):
img=plt.imread(filename)
img_data=np.array(img)
# Minmax=MinMaxScaler()
# img_union=Minmax.fit_transform(img_data)
img_vector=img_data.reshape(1,-1)
return img_vector
```
```
from os import listdir
def collectFiledataset():
img_labels = list()
# 样本数据文件列表
FileList = listdir('knn_num_data')
f = len(FileList)
# 设置初始矩阵
c = 28 * 28
dataSet = np.zeros((5000, c))
for i in range(f):
path = 'knn_num_data/' + FileList[i]
trainingFilelist = listdir(path)
t = len(trainingFilelist)
for j in range(t):
fileNameStr = trainingFilelist[j]
fileStr = fileNameStr.split('.')[0]
classNum = int(fileStr.split('_')[0])
img_labels.append(classNum)
filename = './knn_num_data/' + FileList[i] + '/' + fileNameStr
img_vector = imgTovector(filename)
# 将样本数据存入矩阵
ins = i * (t - 1) + j
dataSet[ins, :] = img_vector[0]
dataSet = pd.DataFrame(dataSet)
dataSet['img_labels'] = img_labels
return dataSet
```
```
dataSet=collectFiledataset()
df=pd.DataFrame(dataSet)
df_r=df.iloc[:,:-1].std(axis=1) !=0
df_c=df.iloc[:,:-1].std(axis=0) !=0
l=df.img_labels
labels=l.loc[df_r.values]
img_df=df.loc[df_r.values,df_c.values]
img_ = img_df.loc[:,img_df.std()!=0]
#归一化
MM=MinMaxScaler()
img_union=MM.fit_transform(img_)
img_union.shape
X_train,X_test,y_train,y_test=train_test_split(feature,labels,test_size=.3)
kd_tree=KNeighborsClassifier(algorithm='kd_tree',n_neighbors=12,p=2,n_jobs=2)
# param_grid = dict(n_neighbors=np.arange(5,100))
# gc = GridSearchCV(kd_tree,param_grid,cv=4,n_jobs=2).fit(feature,labels)
```
```
kd_tree.fit(X_train,y_train)
kd_tree.score(X_train,y_train),kd_tree.score(X_test,y_test)
```