神经网络是高级的逻辑回归(这句话没毛病吧)
在python中使用神经网络除了sklearn调包不同以外,其他的原则都是相同。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
-
导入数据
data = pd.read_csv('PDM\\xx\\4.5\\data.csv')
-
取消空值
data=data.dropna()
-
转化文本数据为离散型数值型数据
data['Education Level'].unique()
educationLevelDict={
'Post-Doc':9,
'Doctorate':8,
'Master\'s Degree':7,
'Bachelor\'s Degree':6,
'Associate\'s Degree':5,
'Some College':4,
'Trade School':3,
'High School':2,
'Grade School':1
}
dummiesSelect=[
'Age', 'Num Bathrooms', 'Num Bedrooms', 'Num Cars', 'Num Children', 'Num TVs',
'Education Level Map', 'PPV Freq Map', 'Theater Freq Map', 'TV Movie Freq Map',
'Prerec Buying Freq Map', 'Prerec Renting Freq Map', 'Prerec Viewing Freq Map',
'Gender Male',
'Internet Connection DSL', 'Internet Connection Dial-Up',
'Internet Connection IDSN', 'Internet Connection No Internet Connection',
'Internet Connection Other',
'Marital Status Married', 'Marital Status Never Married',
'Marital Status Other', 'Marital Status Separated',
'Movie Selector Me', 'Movie Selector Other', 'Movie Selector Spouse/Partner',
'Prerec Format DVD', 'Prerec Format Laserdisk', 'Prerec Format Other',
'Prerec Format VHS', 'Prerec Format Video CD',
'TV Signal Analog antennae', 'TV Signal Cable',
'TV Signal Digital Satellite', 'TV Signal Don\'t watch TV'
]
data['Education Level Map'] = data['Education Level'].map(educationLevelDict)
educationLevelDict
data
data['Gender'].unique()
for m in data['Gender'].unique():
print(m)
type(m)
genderdict={
'Male':1,
'Female':0
}
data['Gender Level Map'] = data['Gender'].map(genderdict)
data['Home Ownership'].unique()
HomeOwnershipdict={
'Own':1,
'Rent':0
}
data['Home Ownership Level Map'] = data['Home Ownership'].map(HomeOwnershipdict)
data['Internet Connection'].unique()
InternetConnectiondict={
'Dial-Up':1,
'DSL':2,
'Cable Modem':3,
'No Internet Connection':4,
'Other':5,
'IDSN':6
}
data['Internet Connection Level Map'] = data['Internet Connection'].map(InternetConnectiondict)
data['Marital Status'].unique()
MaritalStatusdict={
'Married':1,
'Divorced':2,
'Never Married':3,
'Separated':4,
'Other':5
}
data['Marital Status Level Map']=data['Marital Status'].map(MaritalStatusdict)
data['Movie Selector'].unique()
MovieSelectordict={
'Spouse/Partner':1,
'Me':2,
'Other':3,
'Children':4
}
data['Movie Selector Level Map'] = data['Movie Selector'].map(MovieSelectordict)
data['Prerec Format'].unique()
PrerecFormatdict = {
'DVD':1,
'VHS':2,
'Betamax':3,
'Laserdisk':4,
'Video CD':5,
'Other':6
}
data['Prerec Format Level Map'] = data['Prerec Format'].map(PrerecFormatdict)
data['TV Signal'].unique()
TVSignaldict={
'Cable':1,
'Digital Satellite':2,
'Analog antennae':3,
'Don\'t watch TV':4,
'Analog Satellite':5
}
data['TV Signal Level Map'] = data['TV Signal'].map(TVSignaldict)
freqMap={
'Never':0,
'Rarely':1,
'Monthly':2,
'Weekly':3,
'Daily':4
}
data['PPV Freq Map'] = data['PPV Freq'].map(freqMap)
data['Theater Freq Map']=data['Theater Freq'].map(freqMap)
data['TV Movie Freq Map']= data['TV Movie Freq'].map(freqMap)
data['Prerec Buying Freq Map']=data['Prerec Buying Freq'].map(freqMap)
data['Prerec Renting Freq Map']= data['Prerec Renting Freq'].map(freqMap)
data['Prerec Viewing Freq Map'] = data['Prerec Viewing Freq'].map(freqMap)
-
建立新的输入数据矩阵(纯数值型数据)
dummiesSelect = [
'Age', 'Num Bathrooms', 'Num Bedrooms', 'Num Cars', 'Num Children', 'Num TVs',
'Education Level Map', 'PPV Freq Map', 'Theater Freq Map', 'TV Movie Freq Map',
'Prerec Buying Freq Map', 'Prerec Renting Freq Map', 'Prerec Viewing Freq Map',
'Gender Level Map',
'Internet Connection Level Map',
'Marital Status Level Map',
'Movie Selector Level Map',
'Prerec Format Level Map',
'TV Signal Level Map'
]
inputData = data[dummiesSelect]
-
建立输出数据矩阵
outputData = data[['Home Ownership Level Map']]
-
导入计算模型
from sklearn.neural_network import MLPClassifier
for l in range(1, 11):
ANNModel = MLPClassifier(
activation='relu',
hidden_layer_sizes=l
)
ANNModel.fit(inputData, outputData)
score = ANNModel.score(inputData, outputData)
print(str(l) + ", " + str(score))
预测
-
导入预测数据
newData = pd.read_csv('PDM\\xx\\4.5\\data.csv')
-
对预测数据进行预处理去空值
newData=newData.dropna()
-
建立纯数值输入矩阵
newData['Education Level Map'] = newData['Education Level'].map(educationLevelDict)
newData['Gender Level Map'] =newData['Gender'].map(genderdict)
newData['Home Ownership Level Map'] = newData['Home Ownership'].map(HomeOwnershipdict)
newData['Internet Connection Level Map'] = newData['Internet Connection'].map(InternetConnectiondict)
newData['Marital Status Level Map']=newData['Marital Status'].map(MaritalStatusdict)
newData['Movie Selector Level Map'] = newData['Movie Selector'].map(MovieSelectordict)
newData['Prerec Format Level Map'] = newData['Prerec Format'].map(PrerecFormatdict)
newData['TV Signal Level Map'] = newData['TV Signal'].map(TVSignaldict)
newData['PPV Freq Map'] = newData['PPV Freq'].map(freqMap)
newData['Theater Freq Map']=newData['Theater Freq'].map(freqMap)
newData['TV Movie Freq Map']= newData['TV Movie Freq'].map(freqMap)
newData['Prerec Buying Freq Map']=newData['Prerec Buying Freq'].map(freqMap)
newData['Prerec Renting Freq Map']= newData['Prerec Renting Freq'].map(freqMap)
newData['Prerec Viewing Freq Map'] = newData['Prerec Viewing Freq'].map(freqMap)
inputNewData= newData[dummiesSelect]
-
预测
ANNModel.predict(inputNewData)