tf.keras.backend.clear_session()
feature_space = tf.keras.utils.FeatureSpace(
features={
'gateway_id': tf.keras.utils.FeatureSpace.integer_hashed(128, name='gateway_id'),
'hour': tf.keras.utils.FeatureSpace.integer_categorical(name='hour'),
'day': tf.keras.utils.FeatureSpace.integer_categorical(name='day'),
'month': tf.keras.utils.FeatureSpace.integer_categorical(name='month'),
'season': tf.keras.utils.FeatureSpace.integer_categorical(name='season'),
'tz_id_location': tf.keras.utils.FeatureSpace.integer_categorical(name='tz_id_location'),
'temp_c_hour': tf.keras.utils.FeatureSpace.float(name='temp_c_hour'),
'is_day_hour': tf.keras.utils.FeatureSpace.integer_categorical(name='is_day_hour', num_oov_indices=0),
'text_condition_hour': tf.keras.utils.FeatureSpace.integer_categorical(name='text_condition_hour'),
'maxwind_kph_hour': tf.keras.utils.FeatureSpace.float(name='maxwind_kph_hour'),
'wind_degree_hour': tf.keras.utils.FeatureSpace.float(name='wind_degree_hour'),
'wind_dir_hour': tf.keras.utils.FeatureSpace.integer_categorical(name='wind_dir_hour'),
'pressure_in_hour': tf.keras.utils.FeatureSpace.float(name='pressure_in_hour'),
'precip_mm_hour': tf.keras.utils.FeatureSpace.float(name='precip_mm_hour'),
'snow_cm_hour': tf.keras.utils.FeatureSpace.float(name='snow_cm_hour'),
'humidity_hour': tf.keras.utils.FeatureSpace.float(name='humidity_hour'),
'cloud_hour': tf.keras.utils.FeatureSpace.float(name='cloud_hour'),
'heatindex_c_hour': tf.keras.utils.FeatureSpace.float(name='heatindex_c_hour'),
'dewpoint_c_hour': tf.keras.utils.FeatureSpace.float(name='dewpoint_c_hour'),
'chance_of_rain_hour': tf.keras.utils.FeatureSpace.float(name='chance_of_rain_hour'),
'chance_of_snow_hour': tf.keras.utils.FeatureSpace.float(name='chance_of_snow_hour'),
'vis_km_hour': tf.keras.utils.FeatureSpace.float(name='vis_km_hour'),
'gust_kph_hour': tf.keras.utils.FeatureSpace.float(name='gust_kph_hour'),
'uv_hour': tf.keras.utils.FeatureSpace.float(name='uv_hour'),
'temp_c_hour_timing': tf.keras.utils.FeatureSpace.float(name='temp_c_hour_timing'),
'precip_mm_hour_timing': tf.keras.utils.FeatureSpace.float(name='precip_mm_hour_timing'),
'snow_cm_hour_timing': tf.keras.utils.FeatureSpace.float(name='snow_cm_hour_timing'),
'humidity_hour_timing': tf.keras.utils.FeatureSpace.float(name='humidity_hour_timing'),
'cloud_hour_timing': tf.keras.utils.FeatureSpace.float(name='cloud_hour_timing'),
'heatindex_c_hour_timing': tf.keras.utils.FeatureSpace.float(name='heatindex_c_hour_timing'),
'uv_hour_timing': tf.keras.utils.FeatureSpace.float(name='uv_hour_timing'),
'temp_c_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='temp_c_hour_discrete'),
'precip_mm_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='precip_mm_hour_discrete'),
'humidity_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='humidity_hour_discrete'),
'uv_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='uv_hour_discrete'),
'maxwind_kph_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='maxwind_kph_hour_discrete'),
'wind_degree_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='wind_degree_hour_discrete'),
'heatindex_c_hour_discrete': tf.keras.utils.FeatureSpace.integer_categorical(name='heatindex_c_hour_discrete'),
},
crosses=[
tf.keras.utils.FeatureSpace.cross(
feature_names=(
'temp_c_hour_discrete', 'precip_mm_hour_discrete', 'humidity_hour_discrete', 'uv_hour_discrete',
'maxwind_kph_hour_discrete', 'wind_degree_hour_discrete', 'heatindex_c_hour_discrete'
),
crossing_dim=256
)
],
output_mode='dict'
)
ds_train_with_no_labels = ds_train.take(1).map(lambda x, _: x)
feature_space.adapt(ds_train_with_no_labels)
preprocessed_ds_train = ds_train.map(lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_ds_train = preprocessed_ds_train.prefetch(tf.data.AUTOTUNE)
preprocessed_ds_valid = ds_valid.map(lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_ds_valid = preprocessed_ds_valid.prefetch(tf.data.AUTOTUNE)
tf.keras.backend.clear_session()
inputs = dict(feature_space.get_inputs())
layer_gateway_id = tf.keras.layers.Embedding(input_dim=100000, output_dim=10, name='layer_gateway_id')(inputs.get('gateway_id'))
layer_cross = tf.keras.layers.Embedding(input_dim=1000, output_dim=20, name='layer_cross')(
feature_space.crossers[
'temp_c_hour_discrete_X_precip_mm_hour_discrete_X_humidity_hour_discrete_X_uv_hour_discrete_X_maxwind_kph_hour_discrete_X_wind_degree_hour_discrete_X_heatindex_c_hour_discrete'
]([
inputs.get('temp_c_hour_discrete'),
inputs.get('precip_mm_hour_discrete'),
inputs.get('humidity_hour_discrete'),
inputs.get('uv_hour_discrete'),
inputs.get('maxwind_kph_hour_discrete'),
inputs.get('wind_degree_hour_discrete'),
inputs.get('heatindex_c_hour_discrete'),
])
)
layer_not_timing = tf.keras.layers.Concatenate(axis=1, name='layer_not_timing')([
tf.keras.backend.cast(inputs.get(x), tf.float32) for x in [
'hour', 'day', 'month', 'season', 'tz_id_location', 'temp_c_hour', 'is_day_hour', 'text_condition_hour',
'maxwind_kph_hour', 'wind_degree_hour', 'wind_dir_hour', 'pressure_in_hour', 'precip_mm_hour', 'snow_cm_hour',
'humidity_hour', 'cloud_hour', 'heatindex_c_hour', 'dewpoint_c_hour', 'chance_of_rain_hour', 'chance_of_snow_hour',
'vis_km_hour', 'gust_kph_hour', 'uv_hour'
]
] + [tf.squeeze(layer_cross, axis=1)])
layer_dense_1 = tf.keras.layers.Dense(256, activation='relu', name='layer_dense_1')(layer_not_timing)
layer_dense_2 = tf.keras.layers.Dense(256, activation='relu', name='layer_dense_2')(layer_dense_1)
layer_dense_3 = tf.keras.layers.Dense(256, activation='relu', name='layer_dense_3')(layer_dense_2)
layer_dense_4 = tf.keras.layers.Lambda(lambda x: tf.keras.backend.sum(x, axis=1, keepdims=True), name='layer_dense_4')(
tf.keras.layers.Concatenate(axis=1)([layer_dense_1, layer_dense_3]),
)
layer_timing = tf.reshape(tf.keras.layers.Concatenate(axis=1, name='layer_timing')([
inputs.get(x) for x in [
'temp_c_hour_timing', 'precip_mm_hour_timing', 'snow_cm_hour_timing', 'humidity_hour_timing',
'cloud_hour_timing', 'heatindex_c_hour_timing', 'uv_hour_timing'
]
]), [-1, 24, 7]) # (None, 24, 7)
layer_lstm_1 = tf.keras.layers.LSTM(128, return_sequences=True, return_state=False, dropout=0.3, name='layer_lstm_1')(layer_timing) # (None, 24, 128)
layer_lstm_2 = tf.keras.layers.LSTM(64, return_sequences=True, return_state=False, dropout=0.3, name='layer_lstm_2')(layer_lstm_1) # (None, 24, 64)
Wq = tf.keras.layers.Dense(32, name='Q')
Wk_v = tf.keras.layers.Dense(32, name='K_V')
q = Wq(layer_gateway_id) # (None, 1, 32)
k_v = Wk_v(layer_lstm_2) # (None, 24, 32)
layer_att = tf.keras.layers.Attention(name='layer_att')([q, k_v]) # (None, 1, 32)
layer_q_pool = tf.keras.layers.GlobalAveragePooling1D(name='layer_q_pool')(q) # (None, 32)
layer_att_pool = tf.keras.layers.GlobalAveragePooling1D(name='layer_att_pool')(layer_att) # # (None, 32)
layer_concat = tf.keras.layers.Concatenate(axis=1, name='layer_concat')([layer_dense_4, layer_q_pool, layer_att_pool])
layer_dense_5 = tf.keras.layers.Dense(1024, activation='relu', name='layer_dense_5')(layer_concat)
layer_dense_6 = tf.keras.layers.Dense(512, activation='relu', name='layer_dense_6')(layer_dense_5)
outputs = tf.keras.layers.Dense(1, name='output')(layer_dense_6)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
@tf.function
def printbar():
ts = tf.timestamp()
today_ts = ts%(24*60*60)
hour = tf.cast(today_ts//3600+8,tf.int32)%tf.constant(24)
minite = tf.cast((today_ts%3600)//60,tf.int32)
second = tf.cast(tf.floor(today_ts%60),tf.int32)
def timeformat(m):
if tf.strings.length(tf.strings.format("{}",m))==1:
return(tf.strings.format("0{}",m))
else:
return(tf.strings.format("{}",m))
timestring = tf.strings.join([timeformat(hour),timeformat(minite), timeformat(second)],separator = ":")
tf.print("=========="*8,end = "")
tf.print(timestring)
def mape_func(y_true, y_pred):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32)
diff = 100.0 * tf.reduce_sum(tf.abs((y_true - y_pred) / tf.clip_by_value(tf.abs(y_true), 1e-8, float('inf'))))
return diff
def mae_func(y_true, y_pred):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32)
return tf.abs(y_true - y_pred)
@tf.function
def train_step(model, features, labels, optimizer, loss_func, train_loss, train_mae, train_mape):
with tf.GradientTape() as tape:
predictions = model(features, training=True)
loss = loss_func(labels, predictions)
mae = mae_func(labels, predictions)
mape = mape_func(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss.update_state(loss)
train_mae.update_state(mae)
train_mape.update_state(mape)
@tf.function
def valid_step(model, features, labels, loss_func, valid_loss, valid_mae, valid_mape):
predictions = model(features, training=False)
loss = loss_func(labels, predictions)
mae = mae_func(labels, predictions)
mape = mape_func(labels, predictions)
valid_loss.update_state(loss)
valid_mae.update_state(mae)
valid_mape.update_state(mape)
tf.keras.backend.clear_session()
def train_model(model, ds_train, ds_valid, epochs, optimizer, loss_func, train_loss,
train_mae, train_mape, valid_loss, valid_mae, valid_mape, train_labels, valid_labels):
best_metric = float('inf')
best_weights = None
patience = 0
for epoch in tf.range(1, epochs+1):
for i, (features, labels) in enumerate(tqdm(ds_train, desc="训练")):
train_step(model, features, labels, optimizer, loss_func, train_loss, train_mae, train_mape)
for i, (features, labels) in enumerate(tqdm(ds_valid, desc="验证")):
valid_step(model, features, labels, loss_func, valid_loss, valid_mae, valid_mape)
logs = 'Epoch={}, Train Loss={}, Train MAE={}, Train MAPE={}, Valid Loss={}, Valid MAE={}, Valid MAPE={}'
printbar()
tf.print(
tf.strings.format(logs, (epoch, train_loss.result(), train_mae.result(), train_mape.result(),
valid_loss.result(), valid_mae.result(), valid_mape.result()))
)
tf.print()
if valid_loss.result() < best_metric:
best_metric = valid_loss.result()
best_weights = model.get_weights()
patience = 0
else:
patience += 1
if patience > 10:
model.set_weights(best_weights)
break
train_loss.reset_states()
train_mae.reset_states()
train_mape.reset_states()
valid_loss.reset_states()
valid_mae.reset_states()
valid_mape.reset_states()
tf.keras.backend.clear_session()
boundaries = [10, 20, 30]
learning_rates = [0.01, 0.005, 0.001, 0.0005]
optimizer = tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rates))
loss_func = tf.keras.losses.Huber()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_mae = tf.keras.metrics.Mean(name='train_mae')
train_mape = tf.keras.metrics.Mean(name='train_mape')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_mae = tf.keras.metrics.Mean(name='valid_mae')
valid_mape = tf.keras.metrics.Mean(name='valid_mape')
train_model(model, ds_train, ds_valid, 1, optimizer, loss_func, train_loss, train_mae, train_mape, valid_loss, valid_mae, valid_mape, df_train_['label'].values, df_valid_['label'].values)