paddlepaddle飞浆《百度架构师手把手带你零基础实践深度学习》笔记一

一、波士顿房价-线性回归

第一天我们学习了“波士顿房价”案例，编写了第一个线性回归模型。

import numpy as np
import matplotlib.pyplot as plt


def load_data():
    # 从文件导入数据
    datafile = 'housing.data'
    data = np.fromfile(datafile, sep=' ')

    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数
    feature_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
                     'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
    feature_num = len(feature_names)

    # 将原始数据进行Reshape，变成[N, 14]这样的形状
    data = data.reshape([data.shape[0] // feature_num, feature_num])

    # 将原数据集拆分成训练集和测试集
    # 这里使用80%的数据做训练，20%的数据做测试
    # 测试集和训练集必须是没有交集的
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]

    # 计算训练集的最大值，最小值，平均值
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
                               training_data.sum(axis=0) / training_data.shape[0]

    # 对数据进行归一化处理
    for i in range(feature_num):
        # print(maximums[i], minimums[i], avgs[i])
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])

    # 训练集和测试集的划分比例
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data, test_data


class Network(object):
    def __init__(self, num_of_weights):
        # 随机产生w的初始值
        # 为了保持程序每次运行结果的一致性，此处设置固定的随机数种子
        # np.random.seed(0)
        self.w = np.random.randn(num_of_weights, 1)
        self.b = 0.

    def forward(self, x):
        z = np.dot(x, self.w) + self.b
        return z

    def loss(self, z, y):
        error = z - y
        num_samples = error.shape[0]
        cost = error * error
        cost = np.sum(cost) / num_samples
        return cost

    def gradient(self, x, y):
        z = self.forward(x)
        N = x.shape[0]
        gradient_w = 1. / N * np.sum((z - y) * x, axis=0)
        gradient_w = gradient_w[:, np.newaxis]
        gradient_b = 1. / N * np.sum(z - y)
        return gradient_w, gradient_b

    def update(self, gradient_w, gradient_b, eta=0.01):
        self.w = self.w - eta * gradient_w
        self.b = self.b - eta * gradient_b

    def train(self, training_data, num_epoches, batch_size=10, eta=0.01):
        n = len(training_data)
        losses = []
        for epoch_id in range(num_epoches):
            # 在每轮迭代开始之前，将训练数据的顺序随机打乱
            # 然后再按每次取batch_size条数据的方式取出
            np.random.shuffle(training_data)
            # 将训练数据进行拆分，每个mini_batch包含batch_size条的数据
            mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
            for iter_id, mini_batch in enumerate(mini_batches):
                # print(self.w.shape)
                # print(self.b)
                x = mini_batch[:, :-1]
                y = mini_batch[:, -1:]
                a = self.forward(x)
                loss = self.loss(a, y)
                gradient_w, gradient_b = self.gradient(x, y)
                self.update(gradient_w, gradient_b, eta)
                losses.append(loss)
                print('Epoch {:3d} / iter {:3d}, loss = {:.4f}'.
                      format(epoch_id, iter_id, loss))

        return losses


# 获取数据
train_data, test_data = load_data()

# 创建网络
net = Network(13)
# 启动训练
losses = net.train(train_data, num_epoches=50, batch_size=100, eta=0.1)

# 画出损失函数的变化趋势
plot_x = np.arange(len(losses))
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.show()

程序输出打印

Epoch   0 / iter   0, loss = 0.7378
Epoch   0 / iter   1, loss = 1.0192
Epoch   0 / iter   2, loss = 0.8953
Epoch   0 / iter   3, loss = 0.7187
Epoch   0 / iter   4, loss = 0.3238
Epoch   1 / iter   0, loss = 0.7051
Epoch   1 / iter   1, loss = 0.7861
Epoch   1 / iter   2, loss = 0.6244
Epoch   1 / iter   3, loss = 0.5988
Epoch   1 / iter   4, loss = 0.4458
Epoch   2 / iter   0, loss = 0.6392
Epoch   2 / iter   1, loss = 0.6579
Epoch   2 / iter   2, loss = 0.4498
Epoch   2 / iter   3, loss = 0.5088
Epoch   2 / iter   4, loss = 0.0872
Epoch   3 / iter   0, loss = 0.4390
Epoch   3 / iter   1, loss = 0.4419
Epoch   3 / iter   2, loss = 0.5021
Epoch   3 / iter   3, loss = 0.5257
Epoch   3 / iter   4, loss = 0.3474
Epoch   4 / iter   0, loss = 0.3829
Epoch   4 / iter   1, loss = 0.4803
Epoch   4 / iter   2, loss = 0.4093
Epoch   4 / iter   3, loss = 0.3265
Epoch   4 / iter   4, loss = 0.2397
Epoch   5 / iter   0, loss = 0.2519
Epoch   5 / iter   1, loss = 0.3572
Epoch   5 / iter   2, loss = 0.3713
Epoch   5 / iter   3, loss = 0.3993
Epoch   5 / iter   4, loss = 0.1408
Epoch   6 / iter   0, loss = 0.3139
Epoch   6 / iter   1, loss = 0.3148
Epoch   6 / iter   2, loss = 0.3564
Epoch   6 / iter   3, loss = 0.2193
Epoch   6 / iter   4, loss = 0.7519
Epoch   7 / iter   0, loss = 0.3079
Epoch   7 / iter   1, loss = 0.2213
Epoch   7 / iter   2, loss = 0.2008
Epoch   7 / iter   3, loss = 0.2968
Epoch   7 / iter   4, loss = 0.7452
Epoch   8 / iter   0, loss = 0.2508
Epoch   8 / iter   1, loss = 0.2483
Epoch   8 / iter   2, loss = 0.1941
Epoch   8 / iter   3, loss = 0.2225
Epoch   8 / iter   4, loss = 0.3216
Epoch   9 / iter   0, loss = 0.1601
Epoch   9 / iter   1, loss = 0.2106
Epoch   9 / iter   2, loss = 0.1715
Epoch   9 / iter   3, loss = 0.2837
Epoch   9 / iter   4, loss = 0.3635
Epoch  10 / iter   0, loss = 0.1921
Epoch  10 / iter   1, loss = 0.1978
Epoch  10 / iter   2, loss = 0.2064
Epoch  10 / iter   3, loss = 0.1725
Epoch  10 / iter   4, loss = 0.1110
Epoch  11 / iter   0, loss = 0.1593
Epoch  11 / iter   1, loss = 0.2419
Epoch  11 / iter   2, loss = 0.1493
Epoch  11 / iter   3, loss = 0.1792
Epoch  11 / iter   4, loss = 0.1006
Epoch  12 / iter   0, loss = 0.2096
Epoch  12 / iter   1, loss = 0.1531
Epoch  12 / iter   2, loss = 0.1776
Epoch  12 / iter   3, loss = 0.1619
Epoch  12 / iter   4, loss = 0.0634
Epoch  13 / iter   0, loss = 0.2140
Epoch  13 / iter   1, loss = 0.1404
Epoch  13 / iter   2, loss = 0.1582
Epoch  13 / iter   3, loss = 0.1638
Epoch  13 / iter   4, loss = 0.1224
Epoch  14 / iter   0, loss = 0.2183
Epoch  14 / iter   1, loss = 0.1221
Epoch  14 / iter   2, loss = 0.1231
Epoch  14 / iter   3, loss = 0.1846
Epoch  14 / iter   4, loss = 0.0570
Epoch  15 / iter   0, loss = 0.1784
Epoch  15 / iter   1, loss = 0.1674
Epoch  15 / iter   2, loss = 0.1302
Epoch  15 / iter   3, loss = 0.1503
Epoch  15 / iter   4, loss = 0.0683
Epoch  16 / iter   0, loss = 0.1257
Epoch  16 / iter   1, loss = 0.1525
Epoch  16 / iter   2, loss = 0.1778
Epoch  16 / iter   3, loss = 0.1491
Epoch  16 / iter   4, loss = 0.0715
Epoch  17 / iter   0, loss = 0.1233
Epoch  17 / iter   1, loss = 0.1627
Epoch  17 / iter   2, loss = 0.1168
Epoch  17 / iter   3, loss = 0.1766
Epoch  17 / iter   4, loss = 0.1477
Epoch  18 / iter   0, loss = 0.1688
Epoch  18 / iter   1, loss = 0.0969
Epoch  18 / iter   2, loss = 0.1238
Epoch  18 / iter   3, loss = 0.1742
Epoch  18 / iter   4, loss = 0.0897
Epoch  19 / iter   0, loss = 0.1051
Epoch  19 / iter   1, loss = 0.1647
Epoch  19 / iter   2, loss = 0.1124
Epoch  19 / iter   3, loss = 0.1584
Epoch  19 / iter   4, loss = 0.3269
Epoch  20 / iter   0, loss = 0.1371
Epoch  20 / iter   1, loss = 0.1291
Epoch  20 / iter   2, loss = 0.1216
Epoch  20 / iter   3, loss = 0.1345
Epoch  20 / iter   4, loss = 0.2645
Epoch  21 / iter   0, loss = 0.1201
Epoch  21 / iter   1, loss = 0.1283
Epoch  21 / iter   2, loss = 0.1595
Epoch  21 / iter   3, loss = 0.1137
Epoch  21 / iter   4, loss = 0.0603
Epoch  22 / iter   0, loss = 0.1297
Epoch  22 / iter   1, loss = 0.1429
Epoch  22 / iter   2, loss = 0.1249
Epoch  22 / iter   3, loss = 0.1063
Epoch  22 / iter   4, loss = 0.2421
Epoch  23 / iter   0, loss = 0.1291
Epoch  23 / iter   1, loss = 0.1494
Epoch  23 / iter   2, loss = 0.1028
Epoch  23 / iter   3, loss = 0.1186
Epoch  23 / iter   4, loss = 0.1414
Epoch  24 / iter   0, loss = 0.1054
Epoch  24 / iter   1, loss = 0.1349
Epoch  24 / iter   2, loss = 0.1255
Epoch  24 / iter   3, loss = 0.1103
Epoch  24 / iter   4, loss = 0.0228
Epoch  25 / iter   0, loss = 0.1000
Epoch  25 / iter   1, loss = 0.1036
Epoch  25 / iter   2, loss = 0.1254
Epoch  25 / iter   3, loss = 0.1326
Epoch  25 / iter   4, loss = 0.1659
Epoch  26 / iter   0, loss = 0.1149
Epoch  26 / iter   1, loss = 0.0941
Epoch  26 / iter   2, loss = 0.1013
Epoch  26 / iter   3, loss = 0.1349
Epoch  26 / iter   4, loss = 0.2580
Epoch  27 / iter   0, loss = 0.0810
Epoch  27 / iter   1, loss = 0.1173
Epoch  27 / iter   2, loss = 0.1268
Epoch  27 / iter   3, loss = 0.1106
Epoch  27 / iter   4, loss = 0.1656
Epoch  28 / iter   0, loss = 0.0983
Epoch  28 / iter   1, loss = 0.1059
Epoch  28 / iter   2, loss = 0.1063
Epoch  28 / iter   3, loss = 0.1162
Epoch  28 / iter   4, loss = 0.0697
Epoch  29 / iter   0, loss = 0.1205
Epoch  29 / iter   1, loss = 0.1385
Epoch  29 / iter   2, loss = 0.0898
Epoch  29 / iter   3, loss = 0.0668
Epoch  29 / iter   4, loss = 0.1555
Epoch  30 / iter   0, loss = 0.0904
Epoch  30 / iter   1, loss = 0.1028
Epoch  30 / iter   2, loss = 0.1114
Epoch  30 / iter   3, loss = 0.1031
Epoch  30 / iter   4, loss = 0.1983
Epoch  31 / iter   0, loss = 0.1194
Epoch  31 / iter   1, loss = 0.0879
Epoch  31 / iter   2, loss = 0.0903
Epoch  31 / iter   3, loss = 0.1049
Epoch  31 / iter   4, loss = 0.1172
Epoch  32 / iter   0, loss = 0.1221
Epoch  32 / iter   1, loss = 0.1013
Epoch  32 / iter   2, loss = 0.1003
Epoch  32 / iter   3, loss = 0.0776
Epoch  32 / iter   4, loss = 0.0643
Epoch  33 / iter   0, loss = 0.0903
Epoch  33 / iter   1, loss = 0.0977
Epoch  33 / iter   2, loss = 0.0780
Epoch  33 / iter   3, loss = 0.1241
Epoch  33 / iter   4, loss = 0.0923
Epoch  34 / iter   0, loss = 0.1095
Epoch  34 / iter   1, loss = 0.0980
Epoch  34 / iter   2, loss = 0.0720
Epoch  34 / iter   3, loss = 0.0987
Epoch  34 / iter   4, loss = 0.1598
Epoch  35 / iter   0, loss = 0.1284
Epoch  35 / iter   1, loss = 0.0898
Epoch  35 / iter   2, loss = 0.0942
Epoch  35 / iter   3, loss = 0.0673
Epoch  35 / iter   4, loss = 0.0563
Epoch  36 / iter   0, loss = 0.0968
Epoch  36 / iter   1, loss = 0.0851
Epoch  36 / iter   2, loss = 0.0861
Epoch  36 / iter   3, loss = 0.0995
Epoch  36 / iter   4, loss = 0.1689
Epoch  37 / iter   0, loss = 0.1013
Epoch  37 / iter   1, loss = 0.0946
Epoch  37 / iter   2, loss = 0.0851
Epoch  37 / iter   3, loss = 0.0825
Epoch  37 / iter   4, loss = 0.0630
Epoch  38 / iter   0, loss = 0.0994
Epoch  38 / iter   1, loss = 0.1142
Epoch  38 / iter   2, loss = 0.0805
Epoch  38 / iter   3, loss = 0.0641
Epoch  38 / iter   4, loss = 0.0974
Epoch  39 / iter   0, loss = 0.0990
Epoch  39 / iter   1, loss = 0.0841
Epoch  39 / iter   2, loss = 0.0884
Epoch  39 / iter   3, loss = 0.0829
Epoch  39 / iter   4, loss = 0.0577
Epoch  40 / iter   0, loss = 0.0895
Epoch  40 / iter   1, loss = 0.0945
Epoch  40 / iter   2, loss = 0.0753
Epoch  40 / iter   3, loss = 0.0919
Epoch  40 / iter   4, loss = 0.0385
Epoch  41 / iter   0, loss = 0.0921
Epoch  41 / iter   1, loss = 0.0930
Epoch  41 / iter   2, loss = 0.0801
Epoch  41 / iter   3, loss = 0.0794
Epoch  41 / iter   4, loss = 0.0722
Epoch  42 / iter   0, loss = 0.0842
Epoch  42 / iter   1, loss = 0.1064
Epoch  42 / iter   2, loss = 0.0851
Epoch  42 / iter   3, loss = 0.0646
Epoch  42 / iter   4, loss = 0.0488
Epoch  43 / iter   0, loss = 0.0758
Epoch  43 / iter   1, loss = 0.0970
Epoch  43 / iter   2, loss = 0.0901
Epoch  43 / iter   3, loss = 0.0722
Epoch  43 / iter   4, loss = 0.0732
Epoch  44 / iter   0, loss = 0.0845
Epoch  44 / iter   1, loss = 0.0646
Epoch  44 / iter   2, loss = 0.0923
Epoch  44 / iter   3, loss = 0.0814
Epoch  44 / iter   4, loss = 0.2753
Epoch  45 / iter   0, loss = 0.0891
Epoch  45 / iter   1, loss = 0.0690
Epoch  45 / iter   2, loss = 0.0776
Epoch  45 / iter   3, loss = 0.0923
Epoch  45 / iter   4, loss = 0.0258
Epoch  46 / iter   0, loss = 0.1082
Epoch  46 / iter   1, loss = 0.0621
Epoch  46 / iter   2, loss = 0.0781
Epoch  46 / iter   3, loss = 0.0718
Epoch  46 / iter   4, loss = 0.0942
Epoch  47 / iter   0, loss = 0.0621
Epoch  47 / iter   1, loss = 0.0870
Epoch  47 / iter   2, loss = 0.0802
Epoch  47 / iter   3, loss = 0.0882
Epoch  47 / iter   4, loss = 0.0273
Epoch  48 / iter   0, loss = 0.0754
Epoch  48 / iter   1, loss = 0.0653
Epoch  48 / iter   2, loss = 0.0695
Epoch  48 / iter   3, loss = 0.1032
Epoch  48 / iter   4, loss = 0.0291
Epoch  49 / iter   0, loss = 0.0999
Epoch  49 / iter   1, loss = 0.0713
Epoch  49 / iter   2, loss = 0.0691
Epoch  49 / iter   3, loss = 0.0691
Epoch  49 / iter   4, loss = 0.0507

损失函数的变化趋势如下图所示：

损失函数的变化趋势1.png

心得

1.numpy的“广播特性”在向量及矩阵运算中十分的方便快捷。
2.数据归一化：对每个特征进行归一化处理，使得每个特征的取值缩放到[-0.5, 0.5]上。这样做有两个好处：一是模型训练更高效；二是特征前的权重大小可以代表该变量对预测结果的贡献度（因为每个特征值本身的范围相同）。
3.随机梯度下降法（Stochastic Gradient Descent，SGD），核心概念如下：

mini-batch：每次迭代时抽取出来的一批数据被称为一个mini-batch。
batch_size：一个mini-batch所包含的样本数目称为batch_size。
epoch：当程序迭代的时候，按mini-batch逐渐抽取出样本，当把整个数据集都遍历到了的时候，则完成了一轮训练，也叫一个epoch。启动训练时，可以将训练的轮数num_epochs和batch_size作为参数传入。

二利用Paddle飞桨框架重构程序

#加载飞桨、Numpy和相关类库
import paddle
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph import Linear
import numpy as np
import matplotlib.pyplot as plt
import os
import random

def load_data():
    # 从文件导入数据
    datafile = 'housing.data'
    data = np.fromfile(datafile, sep=' ')

    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数
    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
    feature_num = len(feature_names)

    # 将原始数据进行Reshape，变成[N, 14]这样的形状
    data = data.reshape([data.shape[0] // feature_num, feature_num])

    # 将原数据集拆分成训练集和测试集
    # 这里使用80%的数据做训练，20%的数据做测试
    # 测试集和训练集必须是没有交集的
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]

    # 计算train数据集的最大值，最小值，平均值
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
                                 training_data.sum(axis=0) / training_data.shape[0]
    
    # 记录数据的归一化参数，在预测时对数据做归一化
    global max_values
    global min_values
    global avg_values
    max_values = maximums
    min_values = minimums
    avg_values = avgs
    # 都是1*14的向量

    # 对数据进行归一化处理
    for i in range(feature_num):
        #print(maximums[i], minimums[i], avgs[i])
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])

    # 训练集和测试集的划分比例
    #ratio = 0.8
    #offset = int(data.shape[0] * ratio)
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data, test_data

def load_one_example(data_dir):
    f = open(data_dir, 'r')
    datas = f.readlines()
    # 选择倒数第10条数据用于测试
    tmp = datas[-10]
    tmp = tmp.strip().split()
    one_data = [float(v) for v in tmp]


    # 对数据进行归一化处理
    for i in range(len(one_data)-1):
        one_data[i] = (one_data[i] - avg_values[i]) / (max_values[i] - min_values[i])

    data = np.reshape(np.array(one_data[:-1]), [1, -1]).astype(np.float32)
    label = one_data[-1]
    return data, label

class Regressor(fluid.dygraph.Layer):
    def __init__(self):
        super(Regressor, self).__init__()
        
        # 定义一层全连接层，输出维度是1，激活函数为None，即不使用激活函数
        self.fc = Linear(input_dim=13, output_dim=1, act=None)
    
    # 网络的前向计算函数
    def forward(self, inputs):
        x = self.fc(inputs)
        return x

# 定义飞桨动态图的工作环境
with fluid.dygraph.guard():
    # 声明定义好的线性回归模型
    model = Regressor()
    # 开启模型训练模式
    model.train()
    # 加载数据
    training_data, test_data = load_data()
    # 定义优化算法，这里使用随机梯度下降-SGD
    # 学习率设置为0.01
    opt = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=model.parameters())

with dygraph.guard(fluid.CPUPlace()):
    EPOCH_NUM = 10   # 设置外层循环次数
    BATCH_SIZE = 10  # 设置batch大小
    losses = []

    # 定义外层循环
    for epoch_id in range(EPOCH_NUM):
        # 在每轮迭代开始之前，将训练数据的顺序随机的打乱
        np.random.shuffle(training_data)
        # 将训练数据进行拆分，每个batch包含10条数据
        mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)]
        # 定义内层循环
        for iter_id, mini_batch in enumerate(mini_batches):
            x = np.array(mini_batch[:, :-1]).astype('float32') # 获得当前批次训练数据
            y = np.array(mini_batch[:, -1:]).astype('float32') # 获得当前批次训练标签（真实房价）
            # 将numpy数据转为飞桨动态图variable形式
            house_features = dygraph.to_variable(x)
            prices = dygraph.to_variable(y)
            
            # 前向计算
            predicts = model(house_features)
            
            # 计算损失
            loss = fluid.layers.square_error_cost(predicts, label=prices)
            avg_loss = fluid.layers.mean(loss)
            # 记录损失
            losses.append(avg_loss.numpy()[0])
            if iter_id%20==0:
                print("epoch: {}, iter: {}, loss is: {}".format(epoch_id, iter_id, avg_loss.numpy()))
            # 反向传播
            avg_loss.backward()
            # 最小化loss,更新参数
            opt.minimize(avg_loss)
            # 清除梯度
            model.clear_gradients()
    # 保存模型
    fluid.save_dygraph(model.state_dict(), 'LR_model')
    # 画出损失函数的变化趋势
    plot_x = np.arange(len(losses))
    plot_y = np.array(losses)
    plt.plot(plot_x, plot_y)
    plt.show()

# 定义飞桨动态图工作环境
with fluid.dygraph.guard():
    # 保存模型参数，文件名为LR_model
    fluid.save_dygraph(model.state_dict(), 'LR_model')
    print("模型保存成功，模型参数保存在LR_model中")


with dygraph.guard():
    # 参数为保存模型参数的文件地址
    model_dict, _ = fluid.load_dygraph('LR_model')
    model.load_dict(model_dict)
    model.eval()

    # 参数为数据集的文件地址
    test_data, label = load_one_example('./work/housing.data')
    # 将数据转为动态图的variable格式
    test_data = dygraph.to_variable(test_data)
    results = model(test_data)

    # 对结果做反归一化处理
    results = results * (max_values[-1] - min_values[-1]) + avg_values[-1]
    print("Inference result is {}, the corresponding label is {}".format(results.numpy(), label))

该程序是利用了倒数第10个样本作为测试，我对程序进行了一些修改。添加了损失函数的变化趋势图，把数据的20%作为测试集对模型进行测试。

#加载飞桨、Numpy和相关类库
import paddle
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph import Linear
import numpy as np
import matplotlib.pyplot as plt
import os
import random

def load_data():
    # 从文件导入数据
    datafile = 'housing.data'
    data = np.fromfile(datafile, sep=' ')

    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数
    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
    feature_num = len(feature_names)

    # 将原始数据进行Reshape，变成[N, 14]这样的形状
    data = data.reshape([data.shape[0] // feature_num, feature_num])

    # 将原数据集拆分成训练集和测试集
    # 这里使用80%的数据做训练，20%的数据做测试
    # 测试集和训练集必须是没有交集的
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]

    # 计算train数据集的最大值，最小值，平均值
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
                                 training_data.sum(axis=0) / training_data.shape[0]
    
    # 记录数据的归一化参数，在预测时对数据做归一化
    global max_values
    global min_values
    global avg_values
    max_values = maximums
    min_values = minimums
    avg_values = avgs
    # 都是1*14的向量

    # 对数据进行归一化处理
    for i in range(feature_num):
        #print(maximums[i], minimums[i], avgs[i])
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])

    # 训练集和测试集的划分比例
    #ratio = 0.8
    #offset = int(data.shape[0] * ratio)
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data, test_data

class Regressor(fluid.dygraph.Layer):
    def __init__(self):
        super(Regressor, self).__init__()
        
        # 定义一层全连接层，输出维度是1，激活函数为None，即不使用激活函数
        self.fc = Linear(input_dim=13, output_dim=1, act=None)
    
    # 网络的前向计算函数
    def forward(self, inputs):
        x = self.fc(inputs)
        return x

# 定义飞桨动态图的工作环境
with fluid.dygraph.guard():
    # 声明定义好的线性回归模型
    model = Regressor()
    # 开启模型训练模式
    model.train()
    # 加载数据
    training_data, test_data = load_data()
    # 定义优化算法，这里使用随机梯度下降-SGD
    # 学习率设置为0.01
    opt = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=model.parameters())

with dygraph.guard(fluid.CPUPlace()):
    EPOCH_NUM = 10   # 设置外层循环次数
    BATCH_SIZE = 10  # 设置batch大小
    losses = []

    # 定义外层循环
    for epoch_id in range(EPOCH_NUM):
        # 在每轮迭代开始之前，将训练数据的顺序随机的打乱
        np.random.shuffle(training_data)
        # 将训练数据进行拆分，每个batch包含10条数据
        mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)]
        # 定义内层循环
        for iter_id, mini_batch in enumerate(mini_batches):
            x = np.array(mini_batch[:, :-1]).astype('float32') # 获得当前批次训练数据
            y = np.array(mini_batch[:, -1:]).astype('float32') # 获得当前批次训练标签（真实房价）
            # 将numpy数据转为飞桨动态图variable形式
            house_features = dygraph.to_variable(x)
            prices = dygraph.to_variable(y)
            
            # 前向计算
            predicts = model(house_features)
            
            # 计算损失
            loss = fluid.layers.square_error_cost(predicts, label=prices)
            avg_loss = fluid.layers.mean(loss)
            # 记录损失
            losses.append(avg_loss.numpy()[0])
            if iter_id%20==0:
                print("epoch: {}, iter: {}, loss is: {}".format(epoch_id, iter_id, avg_loss.numpy()))
            # 反向传播
            avg_loss.backward()
            # 最小化loss,更新参数
            opt.minimize(avg_loss)
            # 清除梯度
            model.clear_gradients()
    # 保存模型
    fluid.save_dygraph(model.state_dict(), 'LR_model')
    # 画出损失函数的变化趋势
    plot_x = np.arange(len(losses))
    plot_y = np.array(losses)
    plt.plot(plot_x, plot_y)
    plt.show()

# 定义飞桨动态图工作环境
with fluid.dygraph.guard():
    # 保存模型参数，文件名为LR_model
    fluid.save_dygraph(model.state_dict(), 'LR_model')
    print("模型保存成功，模型参数保存在LR_model中")


with dygraph.guard():
    # 参数为保存模型参数的文件地址
    model_dict, _ = fluid.load_dygraph('LR_model')
    model.load_dict(model_dict)
    model.eval()

    x = np.array(test_data[:, :-1]).astype('float32') # 获得当前批次测试数据
    y = np.array(test_data[:, -1:]).astype('float32') # 获得当前批次测试标签（真实房价）
    # 将numpy数据转为飞桨动态图variable形式
    house_features = dygraph.to_variable(x)
    # prices = dygraph.to_variable(y)

    # 前向计算
    results = model(house_features)
    results = results * (max_values[-1] - min_values[-1]) + avg_values[-1]
    label = y * (max_values[-1] - min_values[-1]) + avg_values[-1]
    for i in range(len(y)):
        print("Inference result is {}, the corresponding label is {}".format(results.numpy()[i], label[i]))

损失函数趋势图2.png

Inference result is [23.46879], the corresponding label is [8.5]
Inference result is [11.9041605], the corresponding label is [5.]
Inference result is [22.795517], the corresponding label is [11.900001]
Inference result is [25.68421], the corresponding label is [27.9]
Inference result is [29.061146], the corresponding label is [17.2]
Inference result is [33.748688], the corresponding label is [27.5]
Inference result is [32.948475], the corresponding label is [15.000001]
Inference result is [41.335762], the corresponding label is [17.2]
Inference result is [41.459164], the corresponding label is [17.9]
Inference result is [28.011595], the corresponding label is [16.3]
Inference result is [33.32433], the corresponding label is [7.]
Inference result is [42.055595], the corresponding label is [7.200001]
Inference result is [45.29074], the corresponding label is [7.5]
Inference result is [36.447838], the corresponding label is [10.400001]
Inference result is [28.811592], the corresponding label is [8.8]
Inference result is [46.1286], the corresponding label is [8.400001]
Inference result is [26.914522], the corresponding label is [16.7]
Inference result is [28.464455], the corresponding label is [14.2]
Inference result is [28.660559], the corresponding label is [20.800001]
Inference result is [46.54187], the corresponding label is [13.400001]
Inference result is [47.16037], the corresponding label is [11.700001]
Inference result is [43.37393], the corresponding label is [8.3]
Inference result is [47.180077], the corresponding label is [10.200001]
Inference result is [39.601448], the corresponding label is [10.900001]
Inference result is [43.32173], the corresponding label is [11.]
Inference result is [42.127296], the corresponding label is [9.5]
Inference result is [40.854256], the corresponding label is [14.500001]
Inference result is [39.635536], the corresponding label is [14.1]
Inference result is [41.5254], the corresponding label is [16.1]
Inference result is [40.946266], the corresponding label is [14.3]
Inference result is [37.743042], the corresponding label is [11.700001]
Inference result is [39.644344], the corresponding label is [13.400001]
Inference result is [42.825386], the corresponding label is [9.600001]
Inference result is [43.60471], the corresponding label is [8.700001]
Inference result is [44.005264], the corresponding label is [8.400001]
Inference result is [25.288311], the corresponding label is [12.800001]
Inference result is [22.868505], the corresponding label is [10.5]
Inference result is [24.687489], the corresponding label is [17.1]
Inference result is [24.240313], the corresponding label is [18.4]
Inference result is [24.199179], the corresponding label is [15.400001]
Inference result is [32.264908], the corresponding label is [10.8]
Inference result is [43.4346], the corresponding label is [11.8]
Inference result is [28.851978], the corresponding label is [14.900001]
Inference result is [23.909576], the corresponding label is [12.600001]
Inference result is [23.095879], the corresponding label is [14.1]
Inference result is [28.766623], the corresponding label is [13.000001]
Inference result is [45.35603], the corresponding label is [13.400001]
Inference result is [26.211393], the corresponding label is [15.2]
Inference result is [25.60134], the corresponding label is [16.1]
Inference result is [24.343767], the corresponding label is [17.800001]
Inference result is [44.01912], the corresponding label is [14.900001]
Inference result is [44.03011], the corresponding label is [14.1]
Inference result is [45.5551], the corresponding label is [12.700001]
Inference result is [45.796387], the corresponding label is [13.500001]
Inference result is [31.424139], the corresponding label is [14.900001]
Inference result is [24.853563], the corresponding label is [20.]
Inference result is [32.392868], the corresponding label is [16.400002]
Inference result is [25.520857], the corresponding label is [17.7]
Inference result is [25.102945], the corresponding label is [19.5]
Inference result is [23.742544], the corresponding label is [20.2]
Inference result is [26.780102], the corresponding label is [21.400002]
Inference result is [33.781395], the corresponding label is [19.900002]
Inference result is [44.426804], the corresponding label is [19.]
Inference result is [27.025349], the corresponding label is [19.1]
Inference result is [25.477282], the corresponding label is [19.1]
Inference result is [26.615618], the corresponding label is [20.1]
Inference result is [23.973948], the corresponding label is [19.900002]
Inference result is [21.988102], the corresponding label is [19.6]
Inference result is [25.950409], the corresponding label is [23.2]
Inference result is [28.721003], the corresponding label is [29.800001]
Inference result is [24.449879], the corresponding label is [13.8]
Inference result is [28.55926], the corresponding label is [13.3]
Inference result is [24.220623], the corresponding label is [16.7]
Inference result is [24.39872], the corresponding label is [12.]
Inference result is [23.342709], the corresponding label is [14.6]
Inference result is [23.42834], the corresponding label is [21.400002]
Inference result is [25.13335], the corresponding label is [23.]
Inference result is [23.576199], the corresponding label is [23.7]
Inference result is [23.001907], the corresponding label is [25.]
Inference result is [28.83557], the corresponding label is [21.800001]
Inference result is [31.343594], the corresponding label is [20.6]
Inference result is [27.788128], the corresponding label is [21.2]
Inference result is [23.594887], the corresponding label is [19.1]
Inference result is [28.645176], the corresponding label is [20.6]
Inference result is [14.703245], the corresponding label is [15.2]
Inference result is [17.27957], the corresponding label is [7.]
Inference result is [19.182281], the corresponding label is [8.1]
Inference result is [14.079249], the corresponding label is [13.6]
Inference result is [15.304695], the corresponding label is [20.1]
Inference result is [25.364271], the corresponding label is [21.800001]
Inference result is [27.597918], the corresponding label is [24.5]
Inference result is [29.918457], the corresponding label is [23.1]
Inference result is [22.525549], the corresponding label is [19.7]
Inference result is [21.98687], the corresponding label is [18.300001]
Inference result is [23.629286], the corresponding label is [21.2]
Inference result is [22.417788], the corresponding label is [17.5]
Inference result is [21.218828], the corresponding label is [16.8]
Inference result is [17.28513], the corresponding label is [22.400002]
Inference result is [15.7654705], the corresponding label is [20.6]
Inference result is [13.413251], the corresponding label is [23.9]
Inference result is [13.544857], the corresponding label is [22.]
Inference result is [14.505701], the corresponding label is [11.900001]

感觉有些样本的损失函数值还是挺大的，损失函数的趋势波动也较大。
修改训练的epoch=30和batchsize=10

损失函数变化趋势图3.png

Inference result is [19.16061], the corresponding label is [8.5]
Inference result is [22.257748], the corresponding label is [5.]
Inference result is [16.415882], the corresponding label is [11.900001]
Inference result is [19.12475], the corresponding label is [27.9]
Inference result is [18.044804], the corresponding label is [17.2]
Inference result is [23.062931], the corresponding label is [27.5]
Inference result is [31.414034], the corresponding label is [15.000001]
Inference result is [24.794533], the corresponding label is [17.2]
Inference result is [21.538464], the corresponding label is [17.9]
Inference result is [23.17853], the corresponding label is [16.3]
Inference result is [20.514303], the corresponding label is [7.]
Inference result is [21.33069], the corresponding label is [7.200001]
Inference result is [21.542526], the corresponding label is [7.5]
Inference result is [20.62424], the corresponding label is [10.400001]
Inference result is [29.712818], the corresponding label is [8.8]
Inference result is [20.930944], the corresponding label is [8.400001]
Inference result is [17.59581], the corresponding label is [16.7]
Inference result is [16.747923], the corresponding label is [14.2]
Inference result is [21.217716], the corresponding label is [20.800001]
Inference result is [23.539066], the corresponding label is [13.400001]
Inference result is [25.752272], the corresponding label is [11.700001]
Inference result is [21.991816], the corresponding label is [8.3]
Inference result is [26.51014], the corresponding label is [10.200001]
Inference result is [26.954044], the corresponding label is [10.900001]
Inference result is [20.668505], the corresponding label is [11.]
Inference result is [20.789043], the corresponding label is [9.5]
Inference result is [24.6804], the corresponding label is [14.500001]
Inference result is [24.709906], the corresponding label is [14.1]
Inference result is [25.601355], the corresponding label is [16.1]
Inference result is [20.507002], the corresponding label is [14.3]
Inference result is [21.430033], the corresponding label is [11.700001]
Inference result is [18.807175], the corresponding label is [13.400001]
Inference result is [21.33551], the corresponding label is [9.600001]
Inference result is [19.707918], the corresponding label is [8.700001]
Inference result is [17.18261], the corresponding label is [8.400001]
Inference result is [13.55842], the corresponding label is [12.800001]
Inference result is [15.544412], the corresponding label is [10.5]
Inference result is [14.980553], the corresponding label is [17.1]
Inference result is [14.699271], the corresponding label is [18.4]
Inference result is [15.10469], the corresponding label is [15.400001]
Inference result is [16.296833], the corresponding label is [10.8]
Inference result is [19.446524], the corresponding label is [11.8]
Inference result is [15.883473], the corresponding label is [14.900001]
Inference result is [15.515961], the corresponding label is [12.600001]
Inference result is [15.753342], the corresponding label is [14.1]
Inference result is [16.83473], the corresponding label is [13.000001]
Inference result is [22.049944], the corresponding label is [13.400001]
Inference result is [16.336561], the corresponding label is [15.2]
Inference result is [15.805071], the corresponding label is [16.1]
Inference result is [17.090681], the corresponding label is [17.800001]
Inference result is [22.074022], the corresponding label is [14.900001]
Inference result is [20.900843], the corresponding label is [14.1]
Inference result is [20.975655], the corresponding label is [12.700001]
Inference result is [22.099005], the corresponding label is [13.500001]
Inference result is [18.327238], the corresponding label is [14.900001]
Inference result is [16.442745], the corresponding label is [20.]
Inference result is [18.279144], the corresponding label is [16.400002]
Inference result is [16.227621], the corresponding label is [17.7]
Inference result is [16.755377], the corresponding label is [19.5]
Inference result is [17.453497], the corresponding label is [20.2]
Inference result is [19.145613], the corresponding label is [21.400002]
Inference result is [19.356253], the corresponding label is [19.900002]
Inference result is [22.969572], the corresponding label is [19.]
Inference result is [19.506191], the corresponding label is [19.1]
Inference result is [21.565527], the corresponding label is [19.1]
Inference result is [21.56257], the corresponding label is [20.1]
Inference result is [20.077593], the corresponding label is [19.900002]
Inference result is [22.145761], the corresponding label is [19.6]
Inference result is [20.706675], the corresponding label is [23.2]
Inference result is [20.985466], the corresponding label is [29.800001]
Inference result is [19.808105], the corresponding label is [13.8]
Inference result is [19.584246], the corresponding label is [13.3]
Inference result is [18.44503], the corresponding label is [16.7]
Inference result is [18.256538], the corresponding label is [12.]
Inference result is [19.182587], the corresponding label is [14.6]
Inference result is [20.70591], the corresponding label is [21.400002]
Inference result is [23.339006], the corresponding label is [23.]
Inference result is [24.103197], the corresponding label is [23.7]
Inference result is [24.418491], the corresponding label is [25.]
Inference result is [23.418371], the corresponding label is [21.800001]
Inference result is [21.484959], the corresponding label is [20.6]
Inference result is [22.13229], the corresponding label is [21.2]
Inference result is [20.684986], the corresponding label is [19.1]
Inference result is [21.5238], the corresponding label is [20.6]
Inference result is [27.038603], the corresponding label is [15.2]
Inference result is [26.51231], the corresponding label is [7.]
Inference result is [25.597708], the corresponding label is [8.1]
Inference result is [27.377956], the corresponding label is [13.6]
Inference result is [28.548033], the corresponding label is [20.1]
Inference result is [19.40799], the corresponding label is [21.800001]
Inference result is [19.465754], the corresponding label is [24.5]
Inference result is [18.937977], the corresponding label is [23.1]
Inference result is [17.23689], the corresponding label is [19.7]
Inference result is [18.954657], the corresponding label is [18.300001]
Inference result is [19.263266], the corresponding label is [21.2]
Inference result is [18.391119], the corresponding label is [17.5]
Inference result is [18.768625], the corresponding label is [16.8]
Inference result is [19.263432], the corresponding label is [22.400002]
Inference result is [18.777313], the corresponding label is [20.6]
Inference result is [19.743137], the corresponding label is [23.9]
Inference result is [19.618513], the corresponding label is [22.]
Inference result is [18.951277], the corresponding label is [11.900001]

随着训练次数的增加，损失函数趋向于越来越小。

模型在eval模式时只有前向计算方法，感觉应该还要有评价函数来量化模型的好坏。

程序有个小缺陷：label的值是归一化之后又经过了一次反归一化，个别样本是有一些误差的。

paddlepaddle飞浆《百度架构师手把手带你零基础实践深度学习》笔记一

一、波士顿房价-线性回归

心得

二 利用Paddle飞桨框架重构程序

二利用Paddle飞桨框架重构程序