Imbalanced-learn for NSL-KDD

from Tools.Plot import plot_confusion_matrix,macro_roc
from sklearn.metrics import classification_report,confusion_matrix,log_loss,auc
from sklearn.preprocessing import (
    MinMaxScaler, label_binarize, OneHotEncoder, LabelEncoder)
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV,train_test_split
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier,AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from itertools import cycle, product
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import cycle
import seaborn as sns
import warnings
from tqdm import tqdm_notebook
warnings.filterwarnings('ignore')
%matplotlib inline

RawData

##################### 加载数据 ##########################
train = pd.read_csv('data/train_all.csv')
test = pd.read_csv('data/test_all.csv')
# label encoding
label_dict = {'Normal': 0,
              'Probe': 1,
              'DoS': 2,
              'U2R': 3,
              'R2L': 4}
X_train = train.drop(['label_num'],axis=1)
X_test = test.drop(['label_num'],axis=1)
y_train = train['label_num']
y_test = test['label_num']
print('Shape of training set:', X_train.shape)
print('Shape of testing set:', X_test.shape)
# print('Columns: \n', list(X_train.columns))

labels = [key for i in sorted(label_dict.values()) for key,val in label_dict.items() if val==i]
labels_number = sorted(label_dict.values()) # [0, 1, 2, 3, 4]

train_set_dict = {}
train_set_dict['RawData'] = (X_train,y_train)

Shape of training set: (125973, 15)
Shape of testing set: (22544, 15)

train.label_num.value_counts()

0    67343
2    45927
1    11656
4      995
3       52
Name: label_num, dtype: int64

Over-sampling

RandomOverSample

# 简单的复制样本
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=0,return_indices=True)
X_resampled ,y_resampled,indx= ros.fit_resample(X_train,y_train)
from collections import Counter
print(sorted(Counter(y_resampled).items()))
train_set_dict['RandomOverSampler'] = (X_resampled ,y_resampled)

[(0, 67343), (1, 67343), (2, 67343), (3, 67343), (4, 67343)]

SMOTE

from imblearn.over_sampling import SMOTE, ADASYN
X_resampled, y_resampled = SMOTE().fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['SMOTE'] = (X_resampled ,y_resampled)

[(0, 67343), (1, 67343), (2, 67343), (3, 67343), (4, 67343)]

ADASYN

from imblearn.over_sampling import SMOTE, ADASYN
X_resampled, y_resampled = ADASYN().fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['ADASYN'] = (X_resampled ,y_resampled)

[(0, 67343), (1, 67348), (2, 67312), (3, 67344), (4, 67304)]

BorderlineSMOTE

from imblearn.over_sampling import BorderlineSMOTE
X_resampled, y_resampled = BorderlineSMOTE(kind='borderline-1').fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['BorderlineSMOTE-1'] = (X_resampled ,y_resampled)

[(0, 67343), (1, 67343), (2, 67343), (3, 67343), (4, 67343)]

from imblearn.over_sampling import BorderlineSMOTE
X_resampled, y_resampled = BorderlineSMOTE(kind='borderline-2').fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['BorderlineSMOTE-2'] = (X_resampled ,y_resampled)

[(0, 67343), (1, 67343), (2, 67342), (3, 67342), (4, 67342)]

Under-sampling

ClusterCentroids

from imblearn.under_sampling import ClusterCentroids
cc = ClusterCentroids(random_state=0)
X_resampled, y_resampled = cc.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['ClusterCentroids'] = (X_resampled ,y_resampled)

[(0, 52), (1, 52), (2, 52), (3, 52), (4, 52)]

RandomUnderSampler

from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=0)
X_resampled, y_resampled = rus.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['RandomUnderSampler'] = (X_resampled ,y_resampled)

[(0, 52), (1, 52), (2, 52), (3, 52), (4, 52)]

NearMiss

from imblearn.under_sampling import NearMiss
nm1 = NearMiss(version=1)
X_resampled_nm1, y_resampled = nm1.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['NearMiss-1'] = (X_resampled ,y_resampled)

[(0, 52), (1, 52), (2, 52), (3, 52), (4, 52)]

from imblearn.under_sampling import NearMiss
nm1 = NearMiss(version=2)
X_resampled_nm1, y_resampled = nm1.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['NearMiss-2'] = (X_resampled ,y_resampled)

[(0, 52), (1, 52), (2, 52), (3, 52), (4, 52)]

# from imblearn.under_sampling import NearMiss
# nm1 = NearMiss(version=3)
# X_resampled_nm1, y_resampled = nm1.fit_resample(X_train, y_train)
# print(sorted(Counter(y_resampled).items()))
# train_set_dict['NearMiss-3'] = (X_resampled ,y_resampled)

EditedNearestNeighbours

from imblearn.under_sampling import EditedNearestNeighbours
enn = EditedNearestNeighbours()
X_resampled, y_resampled = enn.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['EditedNearestNeighbours'] = (X_resampled ,y_resampled)

[(0, 66485), (1, 11314), (2, 45818), (3, 52), (4, 898)]

RepeatedEditedNearestNeighbours

from imblearn.under_sampling import RepeatedEditedNearestNeighbours
renn = RepeatedEditedNearestNeighbours()
X_resampled, y_resampled = renn.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['RepeatedEditedNearestNeighbours'] = (X_resampled ,y_resampled)

[(0, 66419), (1, 11256), (2, 45806), (3, 52), (4, 898)]

AllKNN

from imblearn.under_sampling import AllKNN
allknn = AllKNN()
X_resampled, y_resampled = allknn.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['AllKNN'] = (X_resampled ,y_resampled)

[(0, 66695), (1, 11360), (2, 45808), (3, 52), (4, 902)]

CondensedNearestNeighbour

from imblearn.under_sampling import CondensedNearestNeighbour
cnn = CondensedNearestNeighbour(random_state=0)
X_resampled, y_resampled = cnn.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['CondensedNearestNeighbour'] = (X_resampled ,y_resampled)

[(0, 139), (1, 43), (2, 20), (3, 52), (4, 33)]

OneSidedSelection

from imblearn.under_sampling import OneSidedSelection
oss = OneSidedSelection(random_state=0)
X_resampled, y_resampled = oss.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['OneSidedSelection'] = (X_resampled ,y_resampled)

[(0, 29210), (1, 8530), (2, 11655), (3, 52), (4, 713)]

NeighbourhoodCleaningRule

from imblearn.under_sampling import NeighbourhoodCleaningRule
ncr = NeighbourhoodCleaningRule()
X_resampled, y_resampled = ncr.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['NeighbourhoodCleaningRule'] = (X_resampled ,y_resampled)

[(0, 66968), (1, 11470), (2, 45859), (3, 52), (4, 909)]

InstanceHardnessThreshold

from sklearn.linear_model import LogisticRegression
from imblearn.under_sampling import InstanceHardnessThreshold
iht = InstanceHardnessThreshold(random_state=0,
                                estimator=LogisticRegression(
                                    solver='lbfgs', multi_class='auto'))
X_resampled, y_resampled = iht.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['InstanceHardnessThreshold'] = (X_resampled ,y_resampled)

[(0, 62), (1, 52), (2, 52), (3, 52), (4, 52)]

Over- and under-sampling

SMOTEENN

from imblearn.combine import SMOTEENN
smote_enn = SMOTEENN(random_state=0)
X_resampled, y_resampled = smote_enn.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['SMOTEENN'] = (X_resampled ,y_resampled)

[(0, 66114), (1, 67013), (2, 67212), (3, 67194), (4, 66959)]

SMOTETomek

from imblearn.combine import SMOTETomek
smote_tomek = SMOTETomek(random_state=0)
X_resampled, y_resampled = smote_tomek.fit_resample(X_train, y_train)
print(sorted(Counter(y_resampled).items()))
train_set_dict['SMOTETomek'] = (X_resampled ,y_resampled)

[(0, 67231), (1, 67289), (2, 67323), (3, 67323), (4, 67289)]

Bagging

print(sorted(Counter(y_train).items()))
train_set_dict['Bagging'] = (X_train ,y_train)

[(0, 67343), (1, 11656), (2, 45927), (3, 52), (4, 995)]

BalancedBagging

print(sorted(Counter(y_train).items()))
train_set_dict['BalancedBagging'] = (X_train ,y_train)

[(0, 67343), (1, 11656), (2, 45927), (3, 52), (4, 995)]

BalancedRandomForest

print(sorted(Counter(y_train).items()))
train_set_dict['BalancedRandomForest'] = (X_train ,y_train)

[(0, 67343), (1, 11656), (2, 45927), (3, 52), (4, 995)]

Boosting

RUSBoost

print(sorted(Counter(y_train).items()))
train_set_dict['RUSBoost'] = (X_train ,y_train)

[(0, 67343), (1, 11656), (2, 45927), (3, 52), (4, 995)]

EasyEnsemble

print(sorted(Counter(y_train).items()))
train_set_dict['EasyEnsemble'] = (X_train ,y_train)

[(0, 67343), (1, 11656), (2, 45927), (3, 52), (4, 995)]

评估

from sklearn.svm import LinearSVC
from imblearn.metrics import classification_report_imbalanced
from sklearn.ensemble import RandomForestClassifier
################### train #####################################
plt.figure(figsize=(12, 6))
cm = []
clf_report_list = []
for name, (X, y) in tqdm_notebook(list(train_set_dict.items())):
    RANDOM_STATE = 2019
    if name in ['Bagging']:
        from sklearn.ensemble import BaggingClassifier
        from sklearn.tree import DecisionTreeClassifier
        clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                random_state=0)
    elif name in ['BalancedBagging']:
        from imblearn.ensemble import BalancedBaggingClassifier
        clf = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                        sampling_strategy='auto',
                                        replacement=False,
                                        random_state=0)
    elif name in ['BalancedRandomForest']:
        from imblearn.ensemble import BalancedRandomForestClassifier
        clf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
    elif name in ['RUSBoost']:
        from imblearn.ensemble import RUSBoostClassifier
        clf = RUSBoostClassifier(random_state=0)
    elif name in ['EasyEnsemble']:
        from imblearn.ensemble import EasyEnsembleClassifier
        clf = EasyEnsembleClassifier(random_state=0)
    else:
        # 模型
        clf = RandomForestClassifier(n_estimators=161,
                                     max_depth=49,
                                     max_features="sqrt",
                                     random_state=RANDOM_STATE)
    clf.fit(X, y)
    y_test_pred = clf.predict(X_test)
    y_test_score = clf.predict_proba(X_test)  # valid score
######################### 测试集评估 ########################

    # 分类报告
    clf_report = classification_report_imbalanced(
        y_test, y_test_pred, digits=4, target_names=labels)
    clf_report_list.append(clf_report)
    # 混淆矩阵
    cnf_matrix = confusion_matrix(y_test, y_test_pred)
    cm.append((name, cnf_matrix))
    # ROC
    all_fpr, mean_tpr = macro_roc(
        y_test, y_test_score, labels_number)
    roc_auc = auc(all_fpr, mean_tpr)

    sns.set_style('darkgrid')
#     plt.figure(figsize=(5,4))
    plt.plot(all_fpr, mean_tpr, lw=1,
             label='{0} (auc = {1:0.4f})'.format(name, roc_auc))
plt.plot([0, 1], [0, 1], 'k--', lw=1)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.01])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.legend(loc=(1.1, 0))
plt.tight_layout()
plt.show()

output_55_2.png

sns.set_style('white')
fig,axes = plt.subplots(6,4,figsize=(20,20))
for ax,(name,cnf_matrix) in zip(axes.ravel(),cm): 
    plot_confusion_matrix(cnf_matrix, labels,ax=ax,
                          normalize=True,
                          title=name,
                          cmap=plt.cm.Blues)
plt.tight_layout()
plt.show()

output_56_0.png

for clf_report,name in zip(clf_report_list,train_set_dict.keys()):
    print(name,'\n',clf_report)

RawData 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6489    0.9712    0.6024    0.7780    0.7648    0.6066      9711
      Probe     0.8700    0.5808    0.9896    0.6966    0.7581    0.5512      2421
        DoS     0.9616    0.7995    0.9842    0.8731    0.8871    0.7724      7458
        U2R     0.8000    0.0200    1.0000    0.0390    0.1414    0.0180       200
        R2L     0.8936    0.0610    0.9990    0.1142    0.2469    0.0552      2754

avg / total     0.8073    0.7528    0.8222    0.7131    0.7358    0.5829     22544

RandomOverSampler 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6482    0.9683    0.6023    0.7765    0.7637    0.6045      9711
      Probe     0.8523    0.5865    0.9878    0.6949    0.7612    0.5561      2421
        DoS     0.9615    0.7900    0.9844    0.8674    0.8819    0.7626      7458
        U2R     0.2143    0.0300    0.9990    0.0526    0.1731    0.0271       200
        R2L     0.8930    0.0697    0.9988    0.1293    0.2639    0.0632      2754

avg / total     0.7998    0.7502    0.8220    0.7123    0.7362    0.5803     22544

SMOTE 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6606    0.9679    0.6238    0.7853    0.7770    0.6245      9711
      Probe     0.8471    0.5857    0.9873    0.6926    0.7604    0.5550      2421
        DoS     0.9620    0.8081    0.9842    0.8784    0.8918    0.7814      7458
        U2R     0.1818    0.0500    0.9980    0.0784    0.2234    0.0452       200
        R2L     0.9102    0.1068    0.9985    0.1911    0.3265    0.0971      2754

avg / total     0.8066    0.7606    0.8312    0.7273    0.7533    0.5994     22544

ADASYN 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6537    0.9683    0.6118    0.7805    0.7697    0.6135      9711
      Probe     0.8411    0.5989    0.9864    0.6996    0.7686    0.5679      2421
        DoS     0.9610    0.7800    0.9844    0.8611    0.8762    0.7521      7458
        U2R     0.0838    0.0750    0.9927    0.0792    0.2729    0.0676       200
        R2L     0.9113    0.0672    0.9991    0.1251    0.2591    0.0609      2754

avg / total     0.8019    0.7483    0.8260    0.7122    0.7380    0.5821     22544

BorderlineSMOTE-1 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6443    0.9700    0.5948    0.7743    0.7596    0.5986      9711
      Probe     0.8586    0.5869    0.9884    0.6973    0.7617    0.5568      2421
        DoS     0.9603    0.7721    0.9842    0.8560    0.8717    0.7438      7458
        U2R     0.0694    0.0500    0.9940    0.0581    0.2229    0.0450       200
        R2L     0.8915    0.0418    0.9993    0.0798    0.2043    0.0377      2754

avg / total     0.7970    0.7418    0.8188    0.7019    0.7243    0.5687     22544

BorderlineSMOTE-2 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6613    0.9694    0.6243    0.7863    0.7780    0.6261      9711
      Probe     0.8431    0.6258    0.9860    0.7183    0.7855    0.5948      2421
        DoS     0.9623    0.8018    0.9845    0.8748    0.8885    0.7750      7458
        U2R     0.0694    0.0500    0.9940    0.0581    0.2229    0.0450       200
        R2L     0.8961    0.0501    0.9992    0.0949    0.2238    0.0453      2754

avg / total     0.8039    0.7566    0.8314    0.7173    0.7427    0.5959     22544

ClusterCentroids 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6393    0.3194    0.8636    0.4260    0.5252    0.2609      9711
      Probe     0.6794    0.4936    0.9720    0.5718    0.6926    0.4568      2421
        DoS     0.9756    0.6652    0.9918    0.7910    0.8122    0.6382      7458
        U2R     0.0175    0.7250    0.6356    0.0342    0.6788    0.4649       200
        R2L     0.2496    0.2320    0.9029    0.2405    0.4577    0.1954      2754

avg / total     0.7018    0.4454    0.9204    0.5363    0.6313    0.4005     22544

RandomUnderSampler 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6559    0.9543    0.6211    0.7774    0.7699    0.6125      9711
      Probe     0.8116    0.6316    0.9824    0.7103    0.7877    0.5987      2421
        DoS     0.9697    0.7212    0.9889    0.8272    0.8445    0.6941      7458
        U2R     0.1129    0.4200    0.9705    0.1780    0.6384    0.3852       200
        R2L     0.6458    0.0563    0.9957    0.1035    0.2367    0.0508      2754

avg / total     0.7704    0.7281    0.8304    0.6991    0.7302    0.5674     22544

NearMiss-1 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6559    0.9543    0.6211    0.7774    0.7699    0.6125      9711
      Probe     0.8116    0.6316    0.9824    0.7103    0.7877    0.5987      2421
        DoS     0.9697    0.7212    0.9889    0.8272    0.8445    0.6941      7458
        U2R     0.1129    0.4200    0.9705    0.1780    0.6384    0.3852       200
        R2L     0.6458    0.0563    0.9957    0.1035    0.2367    0.0508      2754

avg / total     0.7704    0.7281    0.8304    0.6991    0.7302    0.5674     22544

NearMiss-2 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6559    0.9543    0.6211    0.7774    0.7699    0.6125      9711
      Probe     0.8116    0.6316    0.9824    0.7103    0.7877    0.5987      2421
        DoS     0.9697    0.7212    0.9889    0.8272    0.8445    0.6941      7458
        U2R     0.1129    0.4200    0.9705    0.1780    0.6384    0.3852       200
        R2L     0.6458    0.0563    0.9957    0.1035    0.2367    0.0508      2754

avg / total     0.7704    0.7281    0.8304    0.6991    0.7302    0.5674     22544

EditedNearestNeighbours 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6375    0.9712    0.5821    0.7697    0.7519    0.5873      9711
      Probe     0.8703    0.5820    0.9896    0.6975    0.7589    0.5524      2421
        DoS     0.9608    0.7664    0.9846    0.8527    0.8687    0.7381      7458
        U2R     0.5000    0.0200    0.9998    0.0385    0.1414    0.0180       200
        R2L     0.9023    0.0570    0.9991    0.1072    0.2387    0.0516      2754

avg / total     0.8006    0.7415    0.8136    0.7020    0.7232    0.5630     22544

RepeatedEditedNearestNeighbours 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6395    0.9708    0.5859    0.7711    0.7542    0.5907      9711
      Probe     0.8699    0.5799    0.9896    0.6959    0.7575    0.5504      2421
        DoS     0.9603    0.7723    0.9842    0.8561    0.8719    0.7440      7458
        U2R     0.4444    0.0200    0.9998    0.0383    0.1414    0.0180       200
        R2L     0.9066    0.0599    0.9991    0.1124    0.2447    0.0542      2754

avg / total     0.8013    0.7434    0.8152    0.7042    0.7258    0.5665     22544

AllKNN 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6394    0.9712    0.5855    0.7711    0.7541    0.5906      9711
      Probe     0.8719    0.5849    0.9897    0.7001    0.7608    0.5554      2421
        DoS     0.9609    0.7706    0.9845    0.8553    0.8710    0.7424      7458
        U2R     0.4444    0.0200    0.9998    0.0383    0.1414    0.0180       200
        R2L     0.8944    0.0585    0.9990    0.1097    0.2417    0.0529      2754

avg / total     0.8001    0.7434    0.8151    0.7040    0.7254    0.5663     22544

CondensedNearestNeighbour 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.5797    0.7469    0.5902    0.6527    0.6639    0.4477      9711
      Probe     0.7732    0.5324    0.9812    0.6306    0.7228    0.4990      2421
        DoS     0.8747    0.7675    0.9456    0.8176    0.8519    0.7129      7458
        U2R     0.0036    0.0300    0.9252    0.0064    0.1666    0.0253       200
        R2L     0.7972    0.0414    0.9985    0.0787    0.2033    0.0374      2754

avg / total     0.7195    0.6381    0.8026    0.6290    0.6718    0.4871     22544

OneSidedSelection 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.5274    0.6492    0.5597    0.5820    0.6028    0.3666      9711
      Probe     0.8380    0.5514    0.9872    0.6652    0.7378    0.5206      2421
        DoS     0.6400    0.7647    0.7874    0.6968    0.7759    0.6007      7458
        U2R     0.6667    0.0200    0.9999    0.0388    0.1414    0.0180       200
        R2L     0.8250    0.0240    0.9993    0.0466    0.1548    0.0216      2754

avg / total     0.6356    0.5949    0.7385    0.5587    0.6157    0.4154     22544

NeighbourhoodCleaningRule 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6363    0.9712    0.5800    0.7689    0.7505    0.5853      9711
      Probe     0.8666    0.5795    0.9893    0.6946    0.7572    0.5498      2421
        DoS     0.9605    0.7624    0.9845    0.8501    0.8664    0.7339      7458
        U2R     0.5000    0.0200    0.9998    0.0385    0.1414    0.0180       200
        R2L     0.8920    0.0570    0.9990    0.1072    0.2386    0.0516      2754

avg / total     0.7983    0.7399    0.8127    0.7004    0.7216    0.5604     22544

InstanceHardnessThreshold 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.8535    0.6872    0.9108    0.7614    0.7911    0.6119      9711
      Probe     0.3513    0.5316    0.8819    0.4230    0.6847    0.4524      2421
        DoS     0.9386    0.1947    0.9937    0.3225    0.4398    0.1780      7458
        U2R     0.0224    0.9800    0.6177    0.0439    0.7780    0.6273       200
        R2L     0.2162    0.0610    0.9692    0.0952    0.2432    0.0538      2754

avg / total     0.7425    0.4336    0.9396    0.4921    0.5964    0.3832     22544

SMOTEENN 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6473    0.9682    0.6008    0.7759    0.7627    0.6030      9711
      Probe     0.8378    0.5890    0.9863    0.6917    0.7622    0.5579      2421
        DoS     0.9634    0.7654    0.9856    0.8530    0.8685    0.7377      7458
        U2R     0.2188    0.0700    0.9978    0.1061    0.2643    0.0634       200
        R2L     0.8841    0.1053    0.9981    0.1882    0.3242    0.0957      2754

avg / total     0.7975    0.7470    0.8216    0.7146    0.7397    0.5760     22544

SMOTETomek 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6505    0.9681    0.6063    0.7781    0.7661    0.6082      9711
      Probe     0.8449    0.5849    0.9871    0.6912    0.7598    0.5541      2421
        DoS     0.9610    0.7761    0.9844    0.8587    0.8741    0.7481      7458
        U2R     0.2034    0.0600    0.9979    0.0927    0.2447    0.0543       200
        R2L     0.9159    0.1107    0.9986    0.1976    0.3326    0.1008      2754

avg / total     0.8025    0.7506    0.8237    0.7184    0.7436    0.5818     22544

Bagging 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6558    0.9707    0.6145    0.7828    0.7723    0.6177      9711
      Probe     0.8632    0.5993    0.9886    0.7075    0.7697    0.5694      2421
        DoS     0.9640    0.8013    0.9852    0.8752    0.8885    0.7749      7458
        U2R     0.5000    0.0200    0.9998    0.0385    0.1414    0.0180       200
        R2L     0.9046    0.0930    0.9986    0.1686    0.3047    0.0844      2754

avg / total     0.8091    0.7591    0.8277    0.7236    0.7478    0.5941     22544

BalancedBagging 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6632    0.9445    0.6370    0.7792    0.7757    0.6202      9711
      Probe     0.7238    0.6105    0.9720    0.6623    0.7703    0.5719      2421
        DoS     0.9530    0.7224    0.9824    0.8218    0.8424    0.6913      7458
        U2R     0.1089    0.4150    0.9696    0.1726    0.6343    0.3801       200
        R2L     0.6445    0.0599    0.9954    0.1096    0.2442    0.0541      2754

avg / total     0.7584    0.7224    0.8340    0.6936    0.7310    0.5672     22544

BalancedRandomForest 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.6765    0.9377    0.6606    0.7859    0.7871    0.6366      9711
      Probe     0.7515    0.7084    0.9718    0.7293    0.8297    0.6703      2421
        DoS     0.9683    0.6912    0.9888    0.8066    0.8267    0.6631      7458
        U2R     0.1284    0.4950    0.9699    0.2039    0.6929    0.4573       200
        R2L     0.6671    0.1710    0.9881    0.2723    0.4111    0.1552      2754

avg / total     0.7751    0.7339    0.8454    0.7188    0.7580    0.5886     22544

RUSBoost 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.5100    0.8751    0.3637    0.6444    0.5641    0.3345      9711
      Probe     0.2858    0.3073    0.9076    0.2962    0.5281    0.2622      2421
        DoS     0.9864    0.2241    0.9985    0.3652    0.4730    0.2064      7458
        U2R     0.0359    0.0450    0.9892    0.0399    0.2110    0.0403       200
        R2L     0.5165    0.2498    0.9675    0.3368    0.4916    0.2243      2754

avg / total     0.6401    0.5150    0.7114    0.4717    0.5181    0.2683     22544

EasyEnsemble 
                    pre       rec       spe        f1       geo       iba       sup

     Normal     0.7402    0.8738    0.7679    0.8015    0.8191    0.6781      9711
      Probe     0.2319    0.3792    0.8489    0.2878    0.5674    0.3068      2421
        DoS     0.7588    0.3623    0.9431    0.4904    0.5845    0.3218      7458
        U2R     0.0578    0.5550    0.9191    0.1048    0.7142    0.4915       200
        R2L     0.4072    0.2429    0.9508    0.3043    0.4806    0.2146      2754

avg / total     0.6450    0.5715    0.8582    0.5765    0.6722    0.4621     22544

最后编辑于：2019.01.24 13:08:42

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 199,711评论 5赞 468
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 83,932评论 2赞 376
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 146,770评论 0赞 330
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 53,799评论 1赞 271
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 62,697评论 5赞 359
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,069评论 1赞 276
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 37,535评论 3赞 390
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,200评论 0赞 254
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 40,353评论 1赞 294
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,290评论 2赞 317
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,331评论 1赞 329
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,020评论 3赞 315
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 38,610评论 3赞 303
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,694评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 30,927评论 1赞 255
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 42,330评论 2赞 346
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 41,904评论 2赞 341

Imbalanced-learn for NSL-KDD

RawData

Over-sampling

RandomOverSample

SMOTE

ADASYN

BorderlineSMOTE

Under-sampling

ClusterCentroids

RandomUnderSampler

NearMiss

EditedNearestNeighbours

RepeatedEditedNearestNeighbours

AllKNN

CondensedNearestNeighbour

OneSidedSelection

NeighbourhoodCleaningRule

InstanceHardnessThreshold

Over- and under-sampling

SMOTEENN

SMOTETomek

Bagging

Bagging

BalancedBagging

BalancedRandomForest

Boosting

RUSBoost

EasyEnsemble

评估

推荐阅读更多精彩内容