1.python机器学习库
scikit-learn 机器学习库
Scipy:科学和工程计算工
Python Image Library:python图形处理库
Numpy:高阶大量的矩阵运算
Matplotlib一个Python的图形框架,用来绘制图表
2.作图工具,画树状图
Graphviz - Graph Visualization Software
3.一个预测是否买电脑的例子
csv文件
RID,age,income,student,credit_rating,class_computer
1,youth,high,no,fair,no
2,youth,high,no,excellent,no
3,middel_aged,high,no,fair,yes
4,senior,medium,no,fair,yes
5,senior,low,yes,fair,yes
6,middel_aged,low,yes,excellent,no
7,middel_aged,low,yes,excellent,yes
8,youth,medium,no,fair,no
9,youth,low,yes,fair,yes
10,senior,medium,yes,fair,yes
11,youth,medium,yes,excellent,yes
12,middel_aged,medium,no,excellent,yes
13,middel_aged,high,yes,fair,yes
14,senior,medium,no,excellent,no
代码实现
from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
from sklearn.externals.six import StringIO
from matplotlib.pyplot import clf
allElectronicsData = open("D:\data2.csv","r")
reader = csv.reader(allElectronicsData)
headers = next(reader)
#print(headers)
featureList = []
labelList = []
for row in reader:
labelList.append(row[len(row)-1])
rowDict = {}
for i in range(1, len(row) - 1):
rowDict[headers[i]] = row[i]
featureList.append(rowDict)
#字典对应方式,[{'age': 'youth', 'income': 'high', 'credit_rating': 'fair', 'student': 'no'}]
#print(featureList)
#转化为0-1矩阵
vec = DictVectorizer()
dummyX = vec.fit_transform(featureList).toarray()
print('dumyx: '+str(dummyX))
print(vec.get_feature_names())
#输出标签集合
#print("labelList: "+ str(labelList))
#把标签转化为0-1形式
lb = preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(labelList)
print("dummyY: "+ str(dummyY))
#采用ID3算法
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(dummyX, dummyY)
print("clf" + str(clf))
#打印tree
#dot -Tpdt init.dot -o
#with open("allEle.dot",'w') as f:
# f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(), out_file = f)
oneRowX = dummyX[0, :]
#print("oneRow: " + str(oneRowX))
#预测
newRowX = oneRowX
newRowX[0] = 1
newRowX[1] = 0
newRowX[2] = 0
print("newRowX: " + str(newRowX))
predictedY = clf.predict(newRowX)
print("predicted: " + str(predictedY))