Loading

加餐-TF2.14结构化数据编程示例(回归模型、FeatureSpace)

tf.keras.backend.clear_session()
feature_space = tf.keras.utils.FeatureSpace(
    features={
        'gateway_id': tf.keras.utils.FeatureSpace.integer_hashed(128, name='gateway_id'),
        'hour': tf.keras.utils.FeatureSpace.integer_categorical(name='hour'),
        'day': tf.keras.utils.FeatureSpace.integer_categorical(name='day'),
        'month': tf.keras.utils.FeatureSpace.integer_categorical(name='month'),
        'season': tf.keras.utils.FeatureSpace.integer_categorical(name='season'),
        'tz_id_location': tf.keras.utils.FeatureSpace.integer_categorical(name='tz_id_location'),
        'temp_c_hour': tf.keras.utils.FeatureSpace.float(name='temp_c_hour'),
        'is_day_hour': tf.keras.utils.FeatureSpace.integer_categorical(name='is_day_hour', num_oov_indices=0),
        'text_condition_hour': tf.keras.utils.FeatureSpace.integer_categorical(name='text_condition_hour'),
        'maxwind_kph_hour': tf.keras.utils.FeatureSpace.float(name='maxwind_kph_hour'),
        'wind_degree_hour': tf.keras.utils.FeatureSpace.float(name='wind_degree_hour'),
        'wind_dir_hour': tf.keras.utils.FeatureSpace.integer_categorical(name='wind_dir_hour'),
        'pressure_in_hour': tf.keras.utils.FeatureSpace.float(name='pressure_in_hour'),
        'precip_mm_hour': tf.keras.utils.FeatureSpace.float(name='precip_mm_hour'),
        'snow_cm_hour': tf.keras.utils.FeatureSpace.float(name='snow_cm_hour'),
        'humidity_hour': tf.keras.utils.FeatureSpace.float(name='humidity_hour'),
        'cloud_hour': tf.keras.utils.FeatureSpace.float(name='cloud_hour'),
        'heatindex_c_hour': tf.keras.utils.FeatureSpace.float(name='heatindex_c_hour'),
        'dewpoint_c_hour': tf.keras.utils.FeatureSpace.float(name='dewpoint_c_hour'),
        'chance_of_rain_hour': tf.keras.utils.FeatureSpace.float(name='chance_of_rain_hour'),
        'chance_of_snow_hour': tf.keras.utils.FeatureSpace.float(name='chance_of_snow_hour'),
        'vis_km_hour': tf.keras.utils.FeatureSpace.float(name='vis_km_hour'),
        'gust_kph_hour': tf.keras.utils.FeatureSpace.float(name='gust_kph_hour'),
        'uv_hour': tf.keras.utils.FeatureSpace.float(name='uv_hour'),
        'temp_c_hour_timing': tf.keras.utils.FeatureSpace.float(name='temp_c_hour_timing'),
        'precip_mm_hour_timing': tf.keras.utils.FeatureSpace.float(name='precip_mm_hour_timing'),
        'snow_cm_hour_timing': tf.keras.utils.FeatureSpace.float(name='snow_cm_hour_timing'),
        'humidity_hour_timing': tf.keras.utils.FeatureSpace.float(name='humidity_hour_timing'),
        'cloud_hour_timing': tf.keras.utils.FeatureSpace.float(name='cloud_hour_timing'),
        'heatindex_c_hour_timing': tf.keras.utils.FeatureSpace.float(name='heatindex_c_hour_timing'),
        'uv_hour_timing': tf.keras.utils.FeatureSpace.float(name='uv_hour_timing'),
        'temp_c_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='temp_c_hour_discrete'), 
        'precip_mm_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='precip_mm_hour_discrete'), 
        'humidity_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='humidity_hour_discrete'), 
        'uv_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='uv_hour_discrete'), 
        'maxwind_kph_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='maxwind_kph_hour_discrete'), 
        'wind_degree_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='wind_degree_hour_discrete'), 
        'heatindex_c_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='heatindex_c_hour_discrete'), 
    },
    crosses=[
        tf.keras.utils.FeatureSpace.cross(
            feature_names=(
                'temp_c_hour_discrete', 'precip_mm_hour_discrete', 'humidity_hour_discrete', 'uv_hour_discrete', 
                'maxwind_kph_hour_discrete', 'wind_degree_hour_discrete', 'heatindex_c_hour_discrete'
            ), 
            crossing_dim=256
        )
    ],
    output_mode='dict'
)
ds_train_with_no_labels = ds_train.take(1).map(lambda x, _: x)
feature_space.adapt(ds_train_with_no_labels)
preprocessed_ds_train = ds_train.map(lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_ds_train = preprocessed_ds_train.prefetch(tf.data.AUTOTUNE)
preprocessed_ds_valid = ds_valid.map(lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_ds_valid = preprocessed_ds_valid.prefetch(tf.data.AUTOTUNE)
tf.keras.backend.clear_session()

inputs = dict(feature_space.get_inputs())
layer_gateway_id = tf.keras.layers.Embedding(input_dim=100000, output_dim=10, name='layer_gateway_id')(inputs.get('gateway_id'))
layer_cross = tf.keras.layers.Embedding(input_dim=1000, output_dim=20, name='layer_cross')(
    feature_space.crossers[
    'temp_c_hour_discrete_X_precip_mm_hour_discrete_X_humidity_hour_discrete_X_uv_hour_discrete_X_maxwind_kph_hour_discrete_X_wind_degree_hour_discrete_X_heatindex_c_hour_discrete'
    ]([
        inputs.get('temp_c_hour_discrete'), 
        inputs.get('precip_mm_hour_discrete'), 
        inputs.get('humidity_hour_discrete'),
        inputs.get('uv_hour_discrete'),
        inputs.get('maxwind_kph_hour_discrete'),
        inputs.get('wind_degree_hour_discrete'),
        inputs.get('heatindex_c_hour_discrete'),
      ])
)
layer_not_timing = tf.keras.layers.Concatenate(axis=1, name='layer_not_timing')([
    tf.keras.backend.cast(inputs.get(x), tf.float32) for x in [
        'hour', 'day', 'month', 'season', 'tz_id_location', 'temp_c_hour', 'is_day_hour', 'text_condition_hour', 
        'maxwind_kph_hour', 'wind_degree_hour', 'wind_dir_hour', 'pressure_in_hour', 'precip_mm_hour', 'snow_cm_hour', 
        'humidity_hour', 'cloud_hour', 'heatindex_c_hour', 'dewpoint_c_hour', 'chance_of_rain_hour', 'chance_of_snow_hour', 
        'vis_km_hour', 'gust_kph_hour', 'uv_hour'
    ]
] + [tf.squeeze(layer_cross, axis=1)])
layer_dense_1 = tf.keras.layers.Dense(256, activation='relu', name='layer_dense_1')(layer_not_timing)
layer_dense_2 = tf.keras.layers.Dense(256, activation='relu', name='layer_dense_2')(layer_dense_1)
layer_dense_3 = tf.keras.layers.Dense(256, activation='relu', name='layer_dense_3')(layer_dense_2)
layer_dense_4 = tf.keras.layers.Lambda(lambda x: tf.keras.backend.sum(x, axis=1, keepdims=True), name='layer_dense_4')(
    tf.keras.layers.Concatenate(axis=1)([layer_dense_1, layer_dense_3]),
)
layer_timing = tf.reshape(tf.keras.layers.Concatenate(axis=1, name='layer_timing')([
    inputs.get(x) for x in [
        'temp_c_hour_timing', 'precip_mm_hour_timing', 'snow_cm_hour_timing', 'humidity_hour_timing', 
        'cloud_hour_timing', 'heatindex_c_hour_timing', 'uv_hour_timing'
    ]
]), [-1, 24, 7])  # (None, 24, 7)
layer_lstm_1 = tf.keras.layers.LSTM(128, return_sequences=True, return_state=False, dropout=0.3, name='layer_lstm_1')(layer_timing)  # (None, 24, 128)
layer_lstm_2 = tf.keras.layers.LSTM(64, return_sequences=True, return_state=False, dropout=0.3, name='layer_lstm_2')(layer_lstm_1)  # (None, 24, 64)
Wq = tf.keras.layers.Dense(32, name='Q')
Wk_v = tf.keras.layers.Dense(32, name='K_V')
q = Wq(layer_gateway_id)  # (None, 1, 32)
k_v = Wk_v(layer_lstm_2)  # (None, 24, 32)
layer_att = tf.keras.layers.Attention(name='layer_att')([q, k_v])  # (None, 1, 32)
layer_q_pool = tf.keras.layers.GlobalAveragePooling1D(name='layer_q_pool')(q)  # (None, 32)
layer_att_pool = tf.keras.layers.GlobalAveragePooling1D(name='layer_att_pool')(layer_att)  # # (None, 32)
layer_concat = tf.keras.layers.Concatenate(axis=1, name='layer_concat')([layer_dense_4, layer_q_pool, layer_att_pool])
layer_dense_5 = tf.keras.layers.Dense(1024, activation='relu', name='layer_dense_5')(layer_concat)
layer_dense_6 = tf.keras.layers.Dense(512, activation='relu', name='layer_dense_6')(layer_dense_5)
outputs = tf.keras.layers.Dense(1, name='output')(layer_dense_6)

model = tf.keras.Model(inputs=inputs, outputs=outputs)
@tf.function
def printbar():
    ts = tf.timestamp()
    today_ts = ts%(24*60*60)
    hour = tf.cast(today_ts//3600+8,tf.int32)%tf.constant(24)
    minite = tf.cast((today_ts%3600)//60,tf.int32)
    second = tf.cast(tf.floor(today_ts%60),tf.int32)
    def timeformat(m):
        if tf.strings.length(tf.strings.format("{}",m))==1:
            return(tf.strings.format("0{}",m))
        else:
            return(tf.strings.format("{}",m))
    timestring = tf.strings.join([timeformat(hour),timeformat(minite), timeformat(second)],separator = ":")
    tf.print("=========="*8,end = "")
    tf.print(timestring)

def mape_func(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    diff = 100.0 * tf.reduce_sum(tf.abs((y_true - y_pred) / tf.clip_by_value(tf.abs(y_true), 1e-8, float('inf'))))
    return diff

def mae_func(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    return tf.abs(y_true - y_pred)

@tf.function
def train_step(model, features, labels, optimizer, loss_func, train_loss, train_mae, train_mape):
    with tf.GradientTape() as tape:
        predictions = model(features, training=True)
        loss = loss_func(labels, predictions)
        mae = mae_func(labels, predictions)
        mape = mape_func(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss.update_state(loss)
    train_mae.update_state(mae)
    train_mape.update_state(mape)

@tf.function
def valid_step(model, features, labels, loss_func, valid_loss, valid_mae, valid_mape):
    predictions = model(features, training=False)
    loss = loss_func(labels, predictions)
    mae = mae_func(labels, predictions)
    mape = mape_func(labels, predictions)
    valid_loss.update_state(loss)
    valid_mae.update_state(mae)
    valid_mape.update_state(mape)

tf.keras.backend.clear_session()

def train_model(model, ds_train, ds_valid, epochs, optimizer, loss_func, train_loss, 
                train_mae, train_mape, valid_loss, valid_mae, valid_mape, train_labels, valid_labels):
    best_metric = float('inf')
    best_weights = None
    patience = 0
    for epoch in tf.range(1, epochs+1):
        for i, (features, labels) in enumerate(tqdm(ds_train, desc="训练")):
            train_step(model, features, labels, optimizer, loss_func, train_loss, train_mae, train_mape)
        for i, (features, labels) in enumerate(tqdm(ds_valid, desc="验证")):
            valid_step(model, features, labels, loss_func, valid_loss, valid_mae, valid_mape)
        logs = 'Epoch={}, Train Loss={}, Train MAE={}, Train MAPE={}, Valid Loss={}, Valid MAE={}, Valid MAPE={}'
        printbar()
        tf.print(
            tf.strings.format(logs, (epoch, train_loss.result(), train_mae.result(), train_mape.result(), 
                                     valid_loss.result(), valid_mae.result(), valid_mape.result()))
        )
        tf.print()
        if valid_loss.result() < best_metric:
            best_metric = valid_loss.result()
            best_weights = model.get_weights()
            patience = 0
        else:
            patience += 1
            if patience > 10:
                model.set_weights(best_weights)
                break
        train_loss.reset_states()
        train_mae.reset_states()
        train_mape.reset_states()
        valid_loss.reset_states()
        valid_mae.reset_states()
        valid_mape.reset_states()

tf.keras.backend.clear_session()
boundaries = [10, 20, 30]
learning_rates = [0.01, 0.005, 0.001, 0.0005]
optimizer = tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rates))
loss_func = tf.keras.losses.Huber()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_mae = tf.keras.metrics.Mean(name='train_mae')
train_mape = tf.keras.metrics.Mean(name='train_mape')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_mae = tf.keras.metrics.Mean(name='valid_mae')
valid_mape = tf.keras.metrics.Mean(name='valid_mape')
train_model(model, ds_train, ds_valid, 1, optimizer, loss_func, train_loss, train_mae, train_mape, valid_loss, valid_mae, valid_mape, df_train_['label'].values, df_valid_['label'].values)
posted @ 2024-01-30 19:42  lotuslaw  阅读(5)  评论(0编辑  收藏  举报