自己动手写RNN
说的再好,也不如实际行动,今天手写了一个RNN,没有使用Numpy库,自己写的矩阵运算方法,由于这也只是个学习用的demo,所以矩阵运算那一部分写的比较丑陋,见笑了。
import com.mylearn.hw.Process as pr
class RNN(object):
def __init__(self , input_dim , hidden_dim , output_dim , alpha):
self.input_dim = input_dim;
self.hidden_dim = hidden_dim;
self.output_dim = output_dim;
self.alpha = 0.1
self.U = pr.generateRandomMatrix(self.input_dim, self.hidden_dim);
self.V = pr.generateRandomMatrix(self.hidden_dim, self.output_dim);
self.W = pr.generateRandomMatrix(self.hidden_dim, self.hidden_dim);
self.error = 0
def forward_propagation(self, X):
T = len(X)
s = [[0] * self.hidden_dim for i in range(T)]
s[-1] = [0] * (self.hidden_dim)
o =[ [0] * self.output_dim for i in range(T)]
for t in range(T):
s[t] = pr.sigmoid2D(pr.posPlus2D(pr.matrixMul2D([X[t]], self.U) , pr.matrixMul2D([s[t-1]] , self.W)))[0]
o[t] = pr.sigmoid2D(pr.matrixMul2D([s[t]], self.V))[0]
return [o, s]
def bptt(self , X , Y):
o , s = self.forward_propagation(X)
errors = pr.subtraction2D(Y, o)
u = [[ 0] * self.hidden_dim for i in range(self.input_dim)]
v = [[ 0 ] * self.output_dim for i in range(self.hidden_dim)]
w = [[ 0 ] * self.hidden_dim for i in range(self.hidden_dim)]
T = len(errors)
self.error = 0
s_deltas_pre = [[0] * self.hidden_dim]
for t in range(T):
e = errors[-t - 1]
self.error = self.error + abs(e[0])
s_pre = [s[-t-1]]
if t == T - 1:
s_pre = [[0] * self.hidden_dim]
else:
s_pre = [s[-t-2]]
deltas_layer_2 =pr.posMul2D([e] , pr.sigmoid_output_to_derivative2D([o[-t-1]]))
deltas_layer_1 =pr.posMul2D( pr.posPlus2D(pr.matrixMul2D(deltas_layer_2 , pr.T2D(self.V)) , pr.matrixMul2D(s_deltas_pre, pr.T2D(self.W))) , pr.sigmoid_output_to_derivative2D([s[-t-1]]))
s_deltas_pre = deltas_layer_1;
v = pr.posPlus2D(v , pr.matrixMul2D(pr.T2D([s[-t-1]]) , deltas_layer_2))
w = pr.posPlus2D(w , pr.matrixMul2D(pr.T2D(s_pre) , deltas_layer_1))
u = pr.posPlus2D(u , pr.matrixMul2D(pr.T2D([X[-t-1]]), deltas_layer_1))
return u , v , w , o
def train(self , data):
count = 0
for l in data:
count += 1
a_int = l[0]
b_int = l[1]
c_int = a_int + b_int
a = pr.dec2bin(a_int)
b = pr.dec2bin(b_int)
c = pr.dec2bin(c_int)
X = [[0] * 2 for i in range(len(a))]
Y = [[0] * 1 for i in range(len(a))]
for t in range(len(a)):
X[t][0] = a[len(a) - 1 - t]
X[t][1] = b[len(a) - 1 - t]
Y[t][0] = c[len(a) - 1 - t]
u ,v , w , o = self.bptt(X, Y)
self.U = pr.posPlus2D(pr.mul(u, self.alpha) , self.U)
self.V = pr.posPlus2D(pr.mul(v, self.alpha) , self.V)
self.W = pr.posPlus2D(pr.mul(w, self.alpha) , self.W)
if count % 1000 == 0:
out = pr.around2D(o)
print("Iterate :" , count )
print("input: a " , a_int ,'\t', a )
print("input: b " , b_int ,'\t', b )
print("input: c " , c_int ,'\t', c )
print("predict: %d + %d = %d"%(a_int , b_int , pr.arrayToInt(out)))
print("ERROR:" , self.error)
print('-' * 64)
rnn = RNN(2 , 16 , 1,0.1)
rnn.train(pr.generateData(10000 , 127))
from math import exp
from copy import deepcopy
import random as ran
def generateData( size , max):
lst = []
for x in range(0 , size):
lst.append([ran.randint(0 , max) , ran.randint(0 , max)])
return lst
def generateRandomMatrix(line , row):
res = []
for i in range(line):
temp = []
for j in range(row):
temp.append( round(2 * ran.random() - 1 , 7))
res.append(temp)
return res
base = [str(x) for x in range(10)] + [ chr(x) for x in range(ord('A'),ord('A')+6)]
def dec2bin(string_num):
num = int(string_num)
mid = []
while True:
if num == 0: break
num,rem = divmod(num, 2)
mid.insert(0, int(base[rem]))
ss = ''.join([str(x) for x in mid[::-1]])
sslength = len(ss)
for i in range( 8 - sslength ):
mid.insert(0, 0)
return mid
import decimal as dec
def toFloat(A):
res = []
for i in range(len(A)):
temp = []
for j in range(len(A[i])):
b = dec.Decimal(A[i][j],dec.getcontext())
temp.append(b.__round__(7))
res.append(temp)
return res
#转置
def T2D(A):
line = len(A)
row = len(A[0])
res = [[0] * line for i in range(row)]
for i in range(line):
for j in range(row):
res[j][i] = deepcopy(A[i][j])
return res
def outer1D(A , B):
res = [[0] * len(A) for i in range(len(B))]
for i in len(A):
for j in len(B):
res[i][j] = A[i] * B[j]
return res
def sumabs2D(A ):
sum = 0.0
for i in range(0 , len(A)):
for j in range(0 ,len(A[0])):
sum += abs(A[i][j])
return sum
def subtraction2D(A , B):
res = [[0] * len(A[0]) for i in range(len(A))]
for i in range(0 , len(A)):
for j in range(0 ,len(A[0])):
res[i][j] = A[i][j] - B[i][j]
return res
def matrixMul2D(A, B):
res = [[0] * len(B[0]) for i in range(len(A))]
for i in range(len(A)):
for j in range(len(B[0])):
for k in range(len(B)):
res[i][j] += A[i][k] * B[k][j]
return res
def posMul2D(A , B):
res = [[0] * len(A[0]) for i in range(len(A))]
for i in range(0 , len(A)):
for j in range(0 ,len(A[0])):
res[i][j] = A[i][j] * B[i][j]
return res
def posPlus2D(A , B):
res = [[0] * len(A[0]) for i in range(len(A))]
for i in range(0 , len(A)):
for j in range(0 ,len(A[0])):
res[i][j] = A[i][j] + B[i][j]
return res
def sigmoid_output_to_derivative2D(A):
res = [[0] * len(A[0]) for i in range(len(A))]
for i in range(0 , len(A)):
for j in range(0 ,len(A[0])):
res[i][j] = A[i][j] * (1 - A[i][j])
return res
def sigmoid2D(x):
try:
res = [[0] * len(x[0]) for i in range(len(x))]
for i in range(0 , len(x)):
for j in range(0 ,len(x[i])):
res[i][j] = 1 / (1 + exp(-x[i][j]))
return res
except Exception:
print()
print('X:' , x)
def mul(A , x):
res = [0] * len(A)
for t in range(len(A)):
temp = []
for i in range(len(A[t])):
temp.append(A[t][i] * x)
res[t] = temp
return res
def puls(A , x):
res = [0] * len(A)
for t in range(len(A)):
temp = []
for i in range(len(A[t])):
temp.append(A[t][i] + x)
res[t] = temp
return res
def around2D(A):
res = [0] * len(A)
for t in range(len(A)):
temp = []
for i in range(len(A[t])):
v = 1
if A[t][i] < 0.5:
v = 0
temp.append(v)
res[t] = temp
return res
def arrayToInt(A):
l = len(A)
res = 0
for i in range(l):
v = A[i][0] * pow(2, i)
res += v
return res