ML汇总

常用的scikit-learn分类器

# -*- coding: utf-8 -*-
"""
Created on Fri Jul 29 21:51:11 2016

The use of ten classic machine learning algorithm!

@author: ckawyh
"""


import sys
import time
from sklearn import metrics
import numpy as np
import cPickle as pickle

reload(sys)
sys.setdefaultencoding('utf8')

# Multinomial Naive Bayes Classifier
def naive_bayes_classifier(train_x, train_y):
   from sklearn.naive_bayes import MultinomialNB
   model = MultinomialNB(alpha=0.01)
   model.fit(train_x, train_y)
   return model


# KNN Classifier
def knn_classifier(train_x, train_y):
   from sklearn.neighbors import KNeighborsClassifier
   model = KNeighborsClassifier()
   model.fit(train_x, train_y)
   return model


# Logistic Regression Classifier
def logistic_regression_classifier(train_x, train_y):
   from sklearn.linear_model import LogisticRegression
   model = LogisticRegression(penalty='l2')
   model.fit(train_x, train_y)
   return model


# Random Forest Classifier
def random_forest_classifier(train_x, train_y):
   from sklearn.ensemble import RandomForestClassifier
   model = RandomForestClassifier(n_estimators=8)
   model.fit(train_x, train_y)
   return model


# Decision Tree Classifier
def decision_tree_classifier(train_x, train_y):
   from sklearn import tree
   model = tree.DecisionTreeClassifier()
   model.fit(train_x, train_y)
   return model


# GBDT(Gradient Boosting Decision Tree) Classifier
def gradient_boosting_classifier(train_x, train_y):
   from sklearn.ensemble import GradientBoostingClassifier
   model = GradientBoostingClassifier(n_estimators=200)
   model.fit(train_x, train_y)
   return model


# SVM Classifier
def svm_classifier(train_x, train_y):
   from sklearn.svm import SVC
   model = SVC(kernel='rbf', probability=True)
   model.fit(train_x, train_y)
   return model

# SVM Classifier using cross validation
def svm_cross_validation(train_x, train_y):
   from sklearn.grid_search import GridSearchCV
   from sklearn.svm import SVC
   model = SVC(kernel='rbf', probability=True)
   param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}
   grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1)
   grid_search.fit(train_x, train_y)
   best_parameters = grid_search.best_estimator_.get_params()
   for para, val in best_parameters.items():
       print para, val
   model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True)
   model.fit(train_x, train_y)
   return model

    
if __name__ == '__main__':
   from sklearn.datasets import  load_iris
   from sklearn import cross_validation
   from pandas import DataFrame
   data_dict = load_iris()
   data = data_dict.data
   label = data_dict.target
   df = DataFrame(data)
   df[4] = label
   data_array = df.as_matrix()
   split_train, split_cv = cross_validation.train_test_split(data_array,test_size=0.3,random_state=0)
   
   train_x = split_train[:,0:4]
   train_y = split_train[:,4]
   test_x = split_cv[:,0:4]
   test_y = split_cv[:,4]    
   
   model_save_file = None
   model_save = {}
   test_classifiers = ['NB', 'KNN', 'LR', 'RF', 'DT', 'SVM', 'SVMCV', 'GBDT']
   classifiers = {'NB':naive_bayes_classifier,
                 'KNN':knn_classifier,
                 'LR':logistic_regression_classifier,
                 'RF':random_forest_classifier,
                 'DT':decision_tree_classifier,
                 'SVM':svm_classifier,
                 'SVMCV':svm_cross_validation,
                 'GBDT':gradient_boosting_classifier
   }
   num_train, num_feat = train_x.shape
   num_test, num_feat = test_x.shape
   is_binary_class = (len(np.unique(train_y)) == 2)
   print '******************** Data Info *********************'
   print '#training data: %d, #testing_data: %d, dimension: %d' % (num_train, num_test, num_feat)
   
   for classifier in test_classifiers:
       print '******************* %s ********************' % classifier
       start_time = time.time()
       model = classifiers[classifier](train_x, train_y)
       print 'training took %fs!' % (time.time() - start_time)
       predict = model.predict(test_x)
       if model_save_file != None:
           model_save[classifier] = model
       accuracy = metrics.accuracy_score(test_y, predict)
       report = metrics.classification_report(test_y, predict)
       print 'accuracy: %.2f%%' % (100 * accuracy)
       print report
   
   if model_save_file != None:
       pickle.dump(model_save, open(model_save_file, 'wb'))
运行结果:
******************** Data Info *********************
#training data: 105, #testing_data: 45, dimension: 4
******************* NB ********************
training took 0.001000s!
accuracy: 60.00%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       0.00      0.00      0.00        18
        2.0       0.38      1.00      0.55        11

avg / total       0.45      0.60      0.49        45

******************* KNN ********************
training took 0.000000s!
accuracy: 97.78%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.94      0.97        18
        2.0       0.92      1.00      0.96        11

avg / total       0.98      0.98      0.98        45

******************* LR ********************
training took 0.001000s!
accuracy: 88.89%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.72      0.84        18
        2.0       0.69      1.00      0.81        11

avg / total       0.92      0.89      0.89        45

******************* RF ********************
training took 0.019000s!
accuracy: 97.78%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.94      0.97        18
        2.0       0.92      1.00      0.96        11

avg / total       0.98      0.98      0.98        45

******************* DT ********************
training took 0.000000s!
accuracy: 97.78%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.94      0.97        18
        2.0       0.92      1.00      0.96        11

avg / total       0.98      0.98      0.98        45

******************* SVM ********************
training took 0.001000s!
accuracy: 97.78%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.94      0.97        18
        2.0       0.92      1.00      0.96        11

avg / total       0.98      0.98      0.98        45

******************* SVMCV ********************
Fitting 3 folds for each of 14 candidates, totalling 42 fits
kernel rbf
C 1000
verbose False
probability True
degree 3
shrinking True
max_iter -1
decision_function_shape None
random_state None
tol 0.001
cache_size 200
coef0 0.0
gamma 0.001
class_weight None
training took 0.143000s!
accuracy: 97.78%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.94      0.97        18
        2.0       0.92      1.00      0.96        11

avg / total       0.98      0.98      0.98        45

******************* GBDT ********************
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.0s finished
training took 0.176000s!
accuracy: 97.78%
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        16
        1.0       1.00      0.94      0.97        18
        2.0       0.92      1.00      0.96        11

avg / total       0.98      0.98      0.98        45
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 203,456评论 5 477
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 85,370评论 2 381
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 150,337评论 0 337
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,583评论 1 273
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,596评论 5 365
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,572评论 1 281
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,936评论 3 395
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,595评论 0 258
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,850评论 1 297
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,601评论 2 321
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,685评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,371评论 4 318
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,951评论 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,934评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,167评论 1 259
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 43,636评论 2 349
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,411评论 2 342

推荐阅读更多精彩内容