双塔模型搭建与训练demo

1.模型构建

def get_model():
    """函数式API搭建双塔DNN模型"""
    
    # 输入
    user_id = keras.layers.Input(shape=(1,), name="user_id")
    gender = keras.layers.Input(shape=(1,), name="gender")
    age = keras.layers.Input(shape=(1,), name="age")
    occupation = keras.layers.Input(shape=(1,), name="occupation")
    movie_id = keras.layers.Input(shape=(1,), name="movie_id")
    genre = keras.layers.Input(shape=(1,), name="genre")
    
    # user 塔
    user_vector = tf.keras.layers.concatenate([
            layers.Embedding(num_users, 100)(user_id), 
            layers.Embedding(num_genders, 2)(gender), 
            layers.Embedding(num_ages, 2)(age), 
            layers.Embedding(num_occupations, 2)(occupation)
    ])
    user_vector = layers.Dense(32, activation='relu')(user_vector)
    user_vector = layers.Dense(8, activation='relu', 
                               name="user_embedding", kernel_regularizer='l2')(user_vector)

    # movie塔
    movie_vector = tf.keras.layers.concatenate([
        layers.Embedding(num_movies, 100)(movie_id),
        layers.Embedding(num_genres, 2)(genre)
    ])
    movie_vector = layers.Dense(32, activation='relu')(movie_vector)
    movie_vector = layers.Dense(8, activation='relu', 
                                name="movie_embedding", kernel_regularizer='l2')(movie_vector)

    # 每个用户的embedding和item的embedding作点积
    dot_user_movie = tf.reduce_sum(user_vector*movie_vector, axis = 1)
    dot_user_movie = tf.expand_dims(dot_user_movie, 1)

    output = layers.Dense(1, activation='sigmoid')(dot_user_movie)
    
    return keras.models.Model(inputs=[user_id, gender, age, occupation, movie_id, genre], outputs=[output]) 

2.模型配置

model = get_model()
model.compile(loss=tf.keras.losses.MeanSquaredError(), 
              optimizer=keras.optimizers.RMSprop())

3.模型fit

fit_x_train = [
        X["UserID_idx"], 
        X["Gender_idx"],
        X["Age_idx"],
        X["Occupation_idx"],
        X["MovieID_idx"],
        X["Genres_idx"]
    ]

from datetime import datetime
TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs/logs_"+TIMESTAMP)

history = model.fit(
    x=fit_x_train,
    y=y,
    batch_size=32,
    epochs=5,
    verbose=1,
    callbacks=[tensorboard_callback]
)

4.模型预测

inputs = df.sample(frac=1.0)[["UserID_idx","Gender_idx","Age_idx","Occupation_idx","MovieID_idx", "Genres_idx"]].head(10)

# 对于(用户ID,召回的电影ID列表),计算分数
model.predict([
        inputs["UserID_idx"], 
        inputs["Gender_idx"],
        inputs["Age_idx"],
        inputs["Occupation_idx"],
        inputs["MovieID_idx"],
        inputs["Genres_idx"]
    ])

5.模型保存

model.save("./tensorflow_two_tower.h5")
new_model = tf.keras.models.load_model("./tensorflow_two_tower.h5")

6.保存模型的embedding用于召回

 

posted @ 2022-01-08 16:05  今夜无风  阅读(739)  评论(0编辑  收藏  举报