推荐系列2 DCN

class DCN(BaseEstimator, TransformerMixin):

    def __init__(self, cate_feature_size, cate_field_size, num_feature_size,
                 embedding_size=8,
                 deep_each_layer_nums=[32, 32], 
                 deep_drop_train_prob=[0.5, 0.5, 0.5],
                 cross_layer_num=3,
                 deep_layers_activation=tf.nn.relu,
                 epoches=10, 
                 batch_size=256,
                 learning_rate=0.001, 
                 optimizer_type="adam",
                 verbose=False, 
                 random_seed=2019,
                 loss_type="logloss", 
                 eval_metric=roc_auc_score,
                 l1_reg=0.0,
                 l2_reg=0.0,):
        assert loss_type in ["logloss", "mse"], \
            "loss_type can be either 'logloss' for classification task or 'mse' for regression task"

        self.cate_feature_size = cate_feature_size
        self.num_feature_size = num_feature_size
        self.cate_field_size = cate_field_size
        self.embedding_size = embedding_size
        self.total_size = (self.cate_field_size+self.num_feature_size) * self.embedding_size
        self.deep_each_layer_nums = deep_each_layer_nums
        self.cross_layer_num = cross_layer_num
        self.deep_drop_train_prob = deep_drop_train_prob
        self.deep_layers_activation = deep_layers_activation
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.epoches = epoches
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type

        self.verbose = verbose
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric

        self._init_graph()
    
    def _init_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            tf.set_random_seed(self.random_seed)
            
            self.feat_cate = tf.placeholder(tf.int32, [None, None], name='feat_cate')
            self.feat_num = tf.placeholder(tf.float32, [None, None], name='feat_num')
            self.label = tf.placeholder(tf.float32, [None, 1], 'label')
            self.deep_drop_prob = tf.placeholder(tf.float32, [None], 'deep_drop_prob')
            
            self.weights = self._init_weights()
            
            cate_embeddings = tf.nn.embedding_lookup(
                self.weights['cate_embeddings'], self.feat_cate)
            num_embeddings = tf.multiply(
                self.weights['num_embeddings'], tf.reshape(self.feat_num, [-1, self.num_feature_size, 1]))
            
            x0 = tf.concat([
                tf.reshape(cate_embeddings, [-1, self.cate_field_size*self.embedding_size]), 
                tf.reshape(num_embeddings, [-1, self.num_feature_size*self.embedding_size])], axis=1)
            
            # deep part
            y_deep = tf.nn.dropout(x0, self.deep_drop_prob[0])
            y_deep = tf.multiply(y_deep, 1.0/self.deep_drop_prob[0])
            for i in range(len(self.deep_each_layer_nums)):
                y_deep = tf.add(
                    tf.matmul(y_deep,self.weights["deep_layer_%d" %i]), self.weights["deep_bias_%d"%i])
                y_deep = self.deep_layers_activation(y_deep)
                y_deep = tf.nn.dropout(y_deep, self.deep_drop_prob[i+1])
                y_deep = tf.multiply(y_deep, 1.0/self.deep_drop_prob[i+1])
            
            # cross part
            x0 = tf.reshape(x0, [-1, self.total_size, 1])
            x_l = x0
            
            for l in range(self.cross_layer_num):
                x_l = tf.tensordot(
                    tf.matmul(x0, x_l, transpose_b=True), self.weights['cross_layer_%d' % l]
                    , 1)  + self.weights["cross_bias_%d" % l] + x_l
            cross_network_out = tf.reshape(x_l, (-1, self.total_size))
            
            # concat part
            concat_input = tf.concat([y_deep, cross_network_out], axis=1)
            logits = tf.add(tf.matmul(concat_input,self.weights['concat_weight']), 
                              self.weights['concat_bias'])
            
            if self.loss_type == "logloss":
                self.y_hat = tf.nn.sigmoid(logits)
                loss = tf.losses.log_loss(self.label, self.y_hat)
            elif self.loss_type == 'regloss':
                self.y_hat = logits
                loss = tf.losses.mean_squared_error(self.label, logits)
            if self.l1_reg > 0 or self.l2_reg > 0:
                loss += tf.contrib.layers.l1_l2_regularizer(
                    self.l1_reg, self.l2_reg)(self.weights["concat_weight"])
                for i in range(len(self.deep_each_layer_nums)):
                    loss += tf.contrib.layers.l1_l2_regularizer(
                        self.l1_reg, self.l2_reg)(self.weights["deep_layer_%d" % i])
                for i in range(self.cross_layer_num):
                    loss += tf.contrib.layers.l1_l2_regularizer(
                        self.l1_reg, self.l2_reg)(self.weights["cross_layer_%d" % i])
                    
            if self.optimizer_type == "adam":
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999,
                                                        epsilon=1e-8).minimize(loss)
                
            # init
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.sess.run(init)
            
            # number of params
            total_parameters = 0
            for variable in self.weights.values():
                shape = variable.get_shape()
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters
            if self.verbose > 0:
                print("#params: %d" % total_parameters)
            
    def _init_weights(self):
        weights = dict()
        
        #embeddings
        weights['cate_embeddings'] = tf.Variable(
            tf.random_normal([self.cate_feature_size, self.embedding_size], 0.0, 0.1), 
            name='cate_embeddings')
        weights['num_embeddings'] = tf.Variable(
            tf.random_normal([self.num_feature_size, self.embedding_size], 0.0, 0.1), 
            name='num_embeddings')
#         weights['feature_bias'] = tf.Variable(
#             tf.random_normal([self.cate_feature_size,1],0.0,1.0),name='feature_bias')
        
        #deep layers
        layer_nums = len(self.deep_each_layer_nums)
        
        glorot = np.sqrt(2.0/(self.total_size + self.deep_each_layer_nums[0]))
        weights['deep_layer_0'] = tf.Variable(
            np.random.normal(loc=0,scale=glorot,size=(self.total_size,self.deep_each_layer_nums[0])), 
            dtype=np.float32)
        weights['deep_bias_0'] = tf.Variable(
            np.random.normal(loc=0,scale=glorot,size=(1,self.deep_each_layer_nums[0])), 
            dtype=np.float32)
        
        for i in range(1,layer_nums):
            glorot = np.sqrt(2.0 / (self.deep_each_layer_nums[i - 1] + self.deep_each_layer_nums[i]))
            weights["deep_layer_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(self.deep_each_layer_nums[i - 1], self.deep_each_layer_nums[i])),
                dtype=np.float32)  # layers[i-1] * layers[i]
            weights["deep_bias_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(1, self.deep_each_layer_nums[i])),
                dtype=np.float32)  # 1 * layer[i]
        
        #cross layer
        for i in range(self.cross_layer_num):
            glorot = np.sqrt(2.0 / (self.total_size + 1))
            weights["cross_layer_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(self.total_size, 1)),
                dtype=np.float32)  # layers[i-1] * layers[i]
            weights["cross_bias_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(self.total_size, 1)),
                dtype=np.float32)  # 1 * layer[i]
            
        final_input_size = self.total_size + self.deep_each_layer_nums[-1]
        glorot = np.sqrt(2.0/(final_input_size + 1))
        weights['concat_weight'] = tf.Variable(
            np.random.normal(loc=0,scale=glorot,size=(final_input_size,1)),
            dtype=np.float32)
        weights['concat_bias'] = tf.Variable(
            tf.constant(0.01),
            dtype=np.float32)

        return weights
    
    def get_batch(self, Xc, Xn, y, batch_size, index):
        start = index * batch_size
        end = (index + 1) * batch_size
        end = end if end < len(y) else len(y)
        return Xc[start:end], Xn[start:end], y[start:end]
    
    def get_batch_without_label(self, Xc, Xn, batch_size, index):
        start = index * batch_size
        end = (index + 1) * batch_size
        end = end if end < len(Xc) else len(Xc)
        return Xc[start:end], Xn[start:end]

    # shuffle three lists simutaneously
    def shuffle(self, a, b, c):
        rng_state = np.random.get_state()
        np.random.shuffle(a)
        np.random.set_state(rng_state)
        np.random.shuffle(b)
        np.random.set_state(rng_state)
        np.random.shuffle(c)

    def predict(self, Xc, Xn):
        total_batch = int(np.ceil(len(Xc)/self.batch_size))
        predict = []
        for i in range(total_batch):
            cate_batch, num_batch = self.get_batch_without_label(
                Xc, Xn, self.batch_size, i)
            feed_dict = {self.feat_cate: cate_batch,
                         self.feat_num: num_batch,
                         self.deep_drop_prob: [1.0] * len(self.deep_drop_train_prob),}
            y_hat = self.sess.run(self.y_hat, feed_dict=feed_dict)
            predict.append(y_hat)
        return np.concatenate(predict)
    
    def evaluate(self, Xc, Xn, y):
        predict = self.predict(Xc, Xn).tolist()
        auc = roc_auc_score([x[0] for x in y], [x[0] for x in predict])
        return auc


    def fit_on_batch(self, Xc, Xn, y):
        feed_dict = {self.feat_cate: Xc,
                     self.feat_num: Xn,
                     self.label: y,
                     self.deep_drop_prob: self.deep_drop_train_prob,}
        opt = self.sess.run(self.optimizer,feed_dict=feed_dict)
        return 

    def fit(self, cate_train, num_train, y_train,
            cate_valid=None, num_valid=None, y_valid=None,
            early_stopping=False, epoches=10, verbose=1):
        
        for epoch in range(epoches):
            self.shuffle(cate_train, num_train, y_train)
            total_batch = len(y_train) // self.batch_size
            for i in range(total_batch):
                cate_batch, num_batch, y_batch = self.get_batch(
                    cate_train, num_train, y_train, self.batch_size, i)
                self.fit_on_batch(cate_batch, num_batch, y_batch)
            
            if verbose:
                tra_auc = self.evaluate(cate_train, num_train, y_train)
                if y_valid:
                    val_auc = self.evaluate(cate_valid, num_valid, y_valid)
                    print("epoch:",epoch, 'tra_auc', tra_auc, "val_auc",val_auc)
                else:
                    print("epoch:",epoch, 'tra_auc', tra_auc)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 194,491评论 5 459
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 81,856评论 2 371
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 141,745评论 0 319
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 52,196评论 1 263
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 61,073评论 4 355
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 46,112评论 1 272
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 36,531评论 3 381
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 35,215评论 0 253
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 39,485评论 1 290
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 34,578评论 2 309
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 36,356评论 1 326
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 32,215评论 3 312
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 37,583评论 3 299
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 28,898评论 0 17
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 30,174评论 1 250
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 41,497评论 2 341
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 40,697评论 2 335