简单粗暴的tensorflow-RNN
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | # 数据集 class DataLoader(): def __init__( self ): path = tf.keras.utils.get_file( 'nietzsche.txt' , origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt' ) with open (path, encoding = 'utf-8' ) as f: self .raw_text = f.read().lower() self .chars = sorted ( list ( set ( self .raw_text))) #去除重复项 self .char_indices = dict ((c, i) for i, c in enumerate ( self .chars)) self .indices_char = dict ((i, c) for i, c in enumerate ( self .chars)) self .text = [ self .char_indices[c] for c in self .raw_text] def get_batch( self , seq_length, batch_size): seq = [] next_char = [] for i in range (batch_size): index = np.random.randint( 0 , len ( self .text) - seq_length) seq.append( self .text[index:index + seq_length]) next_char.append( self .text[index + seq_length]) return np.array(seq), np.array(next_char) # [batch_size, seq_length], [num_batch] # RNN模型定义 class RNN(tf.keras.Model): def __init__( self , num_chars, batch_size, seq_length): super ().__init__() self .num_chars = num_chars self .seq_length = seq_length self .batch_size = batch_size self .cell = tf.keras.layers.LSTMCell(units = 256 ) #LSTMCell输出维数 self .dense = tf.keras.layers.Dense(units = self .num_chars) def call( self , inputs, from_logits = False ): inputs = tf.one_hot(inputs, depth = self .num_chars) # [batch_size, seq_length, num_chars] state = self .cell.get_initial_state(batch_size = self .batch_size, dtype = tf.float32) # 获得 RNN 的初始状态 for t in range ( self .seq_length): output, state = self .cell(inputs[:, t, :], state) # 通过当前输入和前一时刻的状态,得到输出和当前时刻的状态 logits = self .dense(output) if from_logits: # from_logits 参数控制输出是否通过 softmax 函数进行归一化 return logits else : return tf.nn.softmax(logits) def predict( self , inputs, temperature = 1. ): #预测 batch_size, _ = tf.shape(inputs) logits = self (inputs, from_logits = True ) # 调用训练好的RNN模型,预测下一个字符的概率分布 prob = tf.nn.softmax(logits / temperature).numpy() # 使用带 temperature 参数的 softmax 函数获得归一化的概率分布值 return np.array([np.random.choice( self .num_chars, p = prob[i, :]) # 使用 np.random.choice 函数, for i in range (batch_size.numpy())]) # 在预测的概率分布 prob 上进行随机取样 # 超参数定义 num_batches = 1000 seq_length = 40 batch_size = 50 learning_rate = 1e - 3 # 模型训练 data_loader = DataLoader() model = RNN(num_chars = len (data_loader.chars), batch_size = batch_size, seq_length = seq_length) optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate) #SGD的扩展Adam for batch_index in range (num_batches): X, y = data_loader.get_batch(seq_length, batch_size) with tf.GradientTape() as tape: y_pred = model(X) loss = tf.keras.losses.sparse_categorical_crossentropy(y_true = y, y_pred = y_pred) loss = tf.reduce_mean(loss) print ( "batch %d: loss %f" % (batch_index, loss.numpy())) grads = tape.gradient(loss, model.variables) optimizer.apply_gradients(grads_and_vars = zip (grads, model.variables)) # 预测 X_, _ = data_loader.get_batch(seq_length, 1 ) for diversity in [ 0.2 , 0.5 , 1.0 , 1.2 ]: # 丰富度(即temperature)分别设置为从小到大的 4 个值 X = X_ print ( "diversity %f:" % diversity) for t in range ( 400 ): y_pred = model.predict(X, diversity) # 预测下一个字符的编号 print (data_loader.indices_char[y_pred[ 0 ]], end = '', flush = True ) # 输出预测的字符 X = np.concatenate([X[:, 1 :], np.expand_dims(y_pred, axis = 1 )], axis = - 1 ) # 将预测的字符接在输入 X 的末尾,并截断 X 的第一个字符,以保证 X 的长度不变 print ( "\n" ) |
天道酬勤 循序渐进 技压群雄
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!