机器学习-线性回归代码

1. 简单线性回归手动实现

numpy as np
import matplotlib.pyplot as plt

class LinerRegression(object):
    def __init__(self, learning_rate=0.01, max_iter=100, seed=None):
        np.random.seed(seed)
        self.lr = learning_rate
        self.max_iter = max_iter
        self.w = np.random.normal(1, 0.1)
        self.b = np.random.normal(1, 0.1)
        self.loss_arr = []

    def fit(self, x, y):
        self.x = x
        self.y = y
        for i in range(self.max_iter):
            self._train_step()
            self.loss_arr.append(self.loss())
            # print('loss: \t{:.3}'.format(self.loss()))
            # print('w: \t{:.3}'.format(self.w))
            # print('b: \t{:.3}'.format(self.b))

    def _f(self, x, w, b):
        return x * w + b

    def predict(self, x=None):
        if x is None:
            x = self.x
        y_pred = self._f(x, self.w, self.b)
        return y_pred

    def loss(self, y_true=None, y_pred=None):
        if y_true is None or y_pred is None:
            y_true = self.y
            y_pred = self.predict(self.x)
        return np.mean((y_true - y_pred)**2)

    def _calc_gradient(self):
        d_w = np.mean((self.x * self.w + self.b - self.y) * self.x)
        d_b = np.mean(self.x * self.w + self.b - self.y)
        return d_w, d_b

    def _train_step(self):
        d_w, d_b = self._calc_gradient()
        self.w = self.w - self.lr * d_w
        self.b = self.b - self.lr * d_b
        return self.w, self.b

def show_data(x, y, w=None, b=None):
    plt.scatter(x, y, marker='.')
    if w is not None and b is not None:
        plt.plot(x, w*x+b, c='red')
    plt.show()


# 生成数据
np.random.seed(272)
data_size = 100
x = np.random.uniform(low=1.0, high=10.0, size=data_size)
y = x * 20 + 10 + np.random.normal(loc=0.0, scale=10.0, size=data_size)

# plt.scatter(x, y, marker='.')
# plt.show()

# 分割成训练集、测试集
shuffled_index = np.random.permutation(data_size)
x = x[shuffled_index]
y = y[shuffled_index]
split_index = int(data_size * 0.7)
x_train = x[:split_index]
y_train = y[:split_index]
x_test = x[split_index:]
y_test = y[split_index:]

# 数据可视化
# plt.scatter(x_train, y_train, marker='.')
# plt.show()
# plt.scatter(x_test, y_test, marker='.')
# plt.show()

# 训练模型
regr = LinerRegression(learning_rate=0.01, max_iter=10, seed=314)
regr.fit(x_train, y_train)
print('cost: \t{:.3}'.format(regr.loss()))
print('w: \t{:.3}'.format(regr.w))
print('b: \t{:.3}'.format(regr.b))
show_data(x, y, regr.w, regr.b)

# 显示损失
plt.scatter(np.arange(len(regr.loss_arr)), regr.loss_arr, marker='o', c='green')
plt.show()

打印结果:

cost: 1.06e+02

w: 20.8

b: 4.21

手动线性回归

2. sklearn实现各种线性回归

import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, RidgeCV, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
from terminaltables import AsciiTable

#解决中文显示问题
from matplotlib.font_manager import *
myfont = FontProperties(fname='C:\Windows\Fonts\simhei.ttf')


# 生成数据
np.random.seed(272)
data_size = 100
x = np.random.uniform(low=1.0, high=10.0, size=data_size)
y = x * 20 + 10 + np.random.normal(loc=0.0, scale=10.0, size=data_size)

# plt.scatter(x, y, marker='.')
# plt.show()

# 分割成训练集、测试集
shuffled_index = np.random.permutation(data_size)
x = x[shuffled_index]
y = y[shuffled_index]
split_index = int(data_size * 0.7)
x_train = x[:split_index].reshape(-1, 1)
y_train = y[:split_index].reshape(-1, 1)
x_test = x[split_index:].reshape(-1, 1)
y_test = y[split_index:].reshape(-1, 1)

# 显示结果
def show_data(x, y, w=None, b=None, title='', xlabel='', ylabel=''):
    plt.scatter(x, y, marker='.')
    if w is not None and b is not None:
        plt.plot(x, w*x+b, c='red')

    plt.title(title, fontproperties=myfont)
    plt.xlabel(xlabel, fontproperties=myfont)
    plt.ylabel(ylabel, fontproperties=myfont)

    plt.show()

# 线性回归
def linear_regression():
    model = LinearRegression()
    model.fit(x_train, y_train)

    w = int(model.coef_)
    b = int(model.intercept_)

    show_data(x_train, y_train, w, b, title='线性回归-训练集', xlabel='x坐标', ylabel='y坐标')
    mse_train, mae_train, mse_test, mae_test = calc_metrics(model, x_train, x_test, y_train, y_test)

    metrics = {
        'model': 'linear',
        'alphas': '',
        'alpha': '',
        'best_alpha': '',
        'w': int(model.coef_),
        'b': int(model.intercept_),
        'mse(训练集)': mse_train,
        'mae(训练集)': mae_train,
        'mse(测试集)': mse_test,
        'mae(测试集)': mae_test,
    }
    return metrics

# lasso回归
def lasso_regression():
    alpha = 0.2

    model = Lasso(alpha=alpha)
    model.fit(x_train, y_train)

    w = int(model.coef_)
    b = int(model.intercept_)
    show_data(x_train, y_train, w, b, title='lasso回归-训练集', xlabel='x坐标', ylabel='y坐标')

    mse_train, mae_train, mse_test, mae_test = calc_metrics(model, x_train, x_test, y_train, y_test)

    metrics = {
        'model': 'lasso',
        'alphas': '',
        'alpha': alpha,
        'best_alpha': '',
        'w': int(model.coef_),
        'b': int(model.intercept_),
        'mse(训练集)': mse_train,
        'mae(训练集)': mae_train,
        'mse(测试集)': mse_test,
        'mae(测试集)': mae_test,
    }
    return metrics

# lasso回归CV实现方式
def lassoCV_regression():
    alphas = [i/10 for i in range(5)]

    model = LassoCV(alphas=alphas)

    model.fit(x_train, y_train)

    w = int(model.coef_)
    b = int(model.intercept_)

    mse_train, mae_train, mse_test, mae_test = calc_metrics(model, x_train, x_test, y_train, y_test)

    show_data(x_train, y_train, w, b, title='lassoCV回归-训练集', xlabel='x坐标', ylabel='y坐标')

    metrics = {
        'model': 'lassoCV',
        'alphas': str(alphas),
        'alpha': '',
        'best_alpha':  model.alpha_,
        'w': int(model.coef_),
        'b': int(model.intercept_),
        'mse(训练集)': mse_train,
        'mae(训练集)': mae_train,
        'mse(测试集)': mse_test,
        'mae(测试集)': mae_test,
    }
    return metrics

# ridge回归
def ridge_regression():
    alpha = 0.3

    model = Ridge(alpha=alpha)
    model.fit(x_train, y_train)

    w = int(model.coef_)
    b = int(model.intercept_)
    show_data(x_train, y_train, w, b, title='ridge回归-训练集', xlabel='x坐标', ylabel='y坐标')

    mse_train, mae_train, mse_test, mae_test = calc_metrics(model, x_train, x_test, y_train, y_test)

    metrics = {
        'model': 'ridge',
        'alphas': '',
        'alpha': alpha,
        'best_alpha': '',
        'w': int(model.coef_),
        'b': int(model.intercept_),
        'mse(训练集)': mse_train,
        'mae(训练集)': mae_train,
        'mse(测试集)': mse_test,
        'mae(测试集)': mae_test,
    }
    return metrics

# ridge回归CV实现方式
def ridgeCV_regression():
    alphas = [i / 10 for i in range(1, 5)]

    model = RidgeCV(alphas=alphas,  store_cv_values=True)

    model.fit(x_train, y_train)

    w = int(model.coef_)
    b = int(model.intercept_)

    mse_train, mae_train, mse_test, mae_test = calc_metrics(model, x_train, x_test, y_train, y_test)

    show_data(x_train, y_train, w, b, title='ridgeCV回归-训练集', xlabel='x坐标', ylabel='y坐标')

    metrics = {
        'model': 'ridgeCV',
        'alphas': str(alphas),
        'alpha': '',
        'best_alpha': model.alpha_,
        'w': int(model.coef_),
        'b': int(model.intercept_),
        'mse(训练集)': mse_train,
        'mae(训练集)': mae_train,
        'mse(测试集)': mse_test,
        'mae(测试集)': mae_test,
    }
    return metrics

# 格式化指标
def show_metrics(metrics_info_list):
    metric_str = ''
    metric_table = [["model", 'alphas', 'alpha', 'best_alpha', 'w', 'b', 'mse(训练集)', 'mae(训练集)', 'mse(测试集)', 'mae(测试集)']]

    metric_list = metric_table[0]

    for info in metrics_info_list:
        row_metrics = ["%.20s" % info.get(key, None) for key in metric_list]
        metric_table += [row_metrics]
        # metric_table += [['%6s' % info.get('model', ''), '%6s' % info.get('datatype', ''), *row_metrics]]

    metric_str += AsciiTable(metric_table).table
    return metric_str

# 计算指标
def calc_metrics(model, x_train, x_test, y_train, y_test):
    y_pred_train = model.predict(x_train)
    mse_train = mean_squared_error(y_train, y_pred_train)
    mae_train = mean_absolute_error(y_train, y_pred_train)

    y_pred_test = model.predict(x_test)
    mse_test = mean_squared_error(y_test, y_pred_test)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    return mse_train, mae_train, mse_test, mae_test

metrics_1 = linear_regression()

metrics_2 = lasso_regression()

metrics_3 = lassoCV_regression()

metrics_4 = ridge_regression()

metrics_5 = ridgeCV_regression()

metrics_info_list = [metrics_1, metrics_2, metrics_3, metrics_4, metrics_5]

metric_str = show_metrics(metrics_info_list)

print(metric_str)

打印结果:

指标

图片结果:

线性回归

lasso回归

lassoCV回归

ridge回归

ridgeCV回归
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 205,386评论 6 479
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 87,939评论 2 381
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 151,851评论 0 341
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,953评论 1 278
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,971评论 5 369
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,784评论 1 283
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 38,126评论 3 399
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,765评论 0 258
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 43,148评论 1 300
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,744评论 2 323
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,858评论 1 333
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,479评论 4 322
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 39,080评论 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 30,053评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,278评论 1 260
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 45,245评论 2 352
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,590评论 2 343