自己动手写RNN

说的再好,也不如实际行动,今天手写了一个RNN,没有使用Numpy库,自己写的矩阵运算方法,由于这也只是个学习用的demo,所以矩阵运算那一部分写的比较丑陋,见笑了。

import com.mylearn.hw.Process as pr
class RNN(object):

    def __init__(self , input_dim , hidden_dim , output_dim , alpha):
        self.input_dim = input_dim;
        self.hidden_dim = hidden_dim;
        self.output_dim = output_dim;
        self.alpha = 0.1
        self.U = pr.generateRandomMatrix(self.input_dim, self.hidden_dim);
        self.V = pr.generateRandomMatrix(self.hidden_dim, self.output_dim);
        self.W = pr.generateRandomMatrix(self.hidden_dim, self.hidden_dim);
        self.error = 0
        
    
    def forward_propagation(self, X):
        T = len(X)
        s =  [[0] * self.hidden_dim for i in range(T)] 
        s[-1] = [0] * (self.hidden_dim)
        o =[ [0] * self.output_dim for i in range(T)]
        for t in range(T):
            s[t] = pr.sigmoid2D(pr.posPlus2D(pr.matrixMul2D([X[t]], self.U) ,  pr.matrixMul2D([s[t-1]] , self.W)))[0]
            o[t] = pr.sigmoid2D(pr.matrixMul2D([s[t]], self.V))[0]
        return [o, s]
        
    def bptt(self , X , Y):
        o , s = self.forward_propagation(X)
        errors = pr.subtraction2D(Y, o)
        u = [[ 0] * self.hidden_dim for i in range(self.input_dim)] 
        v = [[ 0 ] * self.output_dim for i in range(self.hidden_dim)]
        w = [[ 0 ] * self.hidden_dim for i in range(self.hidden_dim)]
        T = len(errors)
        self.error = 0
        s_deltas_pre = [[0] * self.hidden_dim]
        for t in range(T):
            e = errors[-t - 1]
            self.error = self.error + abs(e[0]) 
            s_pre = [s[-t-1]]
            if t == T - 1:
                s_pre = [[0] * self.hidden_dim]
            else:
                s_pre = [s[-t-2]]
            deltas_layer_2 =pr.posMul2D([e] ,  pr.sigmoid_output_to_derivative2D([o[-t-1]]))
            
            deltas_layer_1 =pr.posMul2D( pr.posPlus2D(pr.matrixMul2D(deltas_layer_2 , pr.T2D(self.V)) ,  pr.matrixMul2D(s_deltas_pre, pr.T2D(self.W))) , pr.sigmoid_output_to_derivative2D([s[-t-1]]))
            s_deltas_pre = deltas_layer_1;
            v  = pr.posPlus2D(v ,  pr.matrixMul2D(pr.T2D([s[-t-1]])   , deltas_layer_2))
            w  = pr.posPlus2D(w ,  pr.matrixMul2D(pr.T2D(s_pre) , deltas_layer_1))
            u  = pr.posPlus2D(u ,  pr.matrixMul2D(pr.T2D([X[-t-1]]), deltas_layer_1))
        return u , v , w , o
    def train(self , data):
        count = 0
        for l in data:
            count += 1
            a_int = l[0]
            b_int = l[1]
            c_int = a_int + b_int
            a = pr.dec2bin(a_int)
            b = pr.dec2bin(b_int)
            c = pr.dec2bin(c_int)
            X = [[0] * 2 for i in range(len(a))]
            Y = [[0] * 1 for i in range(len(a))]
            for t in range(len(a)):
                X[t][0] = a[len(a) - 1 - t]
                X[t][1] = b[len(a) - 1 - t] 
                Y[t][0] = c[len(a) - 1 - t]
            u ,v , w , o = self.bptt(X, Y)
           
            
            self.U  = pr.posPlus2D(pr.mul(u, self.alpha) , self.U)
            self.V  = pr.posPlus2D(pr.mul(v, self.alpha) , self.V)
            self.W  = pr.posPlus2D(pr.mul(w, self.alpha) , self.W)
            
            if count % 1000 == 0:
                out = pr.around2D(o)
                print("Iterate :" , count )
                print("input: a "  , a_int ,'\t', a )
                print("input: b "  , b_int ,'\t', b )
                print("input: c "  , c_int ,'\t', c )
                print("predict: %d + %d = %d"%(a_int , b_int , pr.arrayToInt(out)))
                print("ERROR:" , self.error)
                print('-' * 64)
                
rnn = RNN(2 , 16 , 1,0.1)        
rnn.train(pr.generateData(10000 , 127))
from math import exp
from copy import deepcopy
import random as ran


def generateData( size , max):
    lst = []
    for x in range(0 , size):
        lst.append([ran.randint(0 , max) , ran.randint(0 , max)])
    return lst
def generateRandomMatrix(line , row):
    res = []
    for i in range(line):
        temp = []
        for j in range(row):
            temp.append( round(2 * ran.random() - 1 , 7))
        res.append(temp)
    return res

base = [str(x) for x in range(10)] + [ chr(x) for x in range(ord('A'),ord('A')+6)]
def dec2bin(string_num):
    num = int(string_num)
    mid = []
    while True:
        if num == 0: break
        num,rem = divmod(num, 2)
        mid.insert(0, int(base[rem]))
    ss =  ''.join([str(x) for x in mid[::-1]])
    sslength = len(ss)
    for i in range( 8 - sslength ):
        mid.insert(0, 0)
    return mid   
import decimal as dec
def toFloat(A):
    res = []
    for i in range(len(A)):
        temp = []
        for j in range(len(A[i])):
            b = dec.Decimal(A[i][j],dec.getcontext())
            temp.append(b.__round__(7))
        res.append(temp)
    return res
#转置
def T2D(A):
    line = len(A)
    row = len(A[0])
    res =   [[0] * line for i in range(row)]
    for i in range(line):
        for j in range(row):
            res[j][i] = deepcopy(A[i][j])
    return res
def outer1D(A , B):
    res = [[0] * len(A) for i in range(len(B))]
    for i in len(A):
        for j in len(B):
            res[i][j] = A[i] * B[j]
    return res
def sumabs2D(A ):
    sum =  0.0
    for i in range(0 , len(A)):
        for j in range(0 ,len(A[0])):
            sum += abs(A[i][j])
    return sum
def subtraction2D(A , B):
    res = [[0] * len(A[0]) for i in range(len(A))]
    for i in range(0 , len(A)):
        for j in range(0 ,len(A[0])):
            res[i][j] = A[i][j] - B[i][j]
    return res
     
def matrixMul2D(A, B):
    res = [[0] * len(B[0]) for i in range(len(A))]
    for i in range(len(A)):
        for j in range(len(B[0])):
            for k in range(len(B)):
                res[i][j] += A[i][k] * B[k][j]
    return res
def posMul2D(A , B):
    res = [[0] * len(A[0]) for i in range(len(A))]
    for i in range(0 , len(A)):
        for j in range(0 ,len(A[0])):
            res[i][j] = A[i][j] * B[i][j]
    return res
def posPlus2D(A , B):
    res = [[0] * len(A[0]) for i in range(len(A))]
    for i in range(0 , len(A)):
        for j in range(0 ,len(A[0])):
            res[i][j] = A[i][j]  + B[i][j]
    return res
def sigmoid_output_to_derivative2D(A):
    res = [[0] * len(A[0]) for i in range(len(A))]
    for i in range(0 , len(A)):
        for j in range(0 ,len(A[0])):
            res[i][j] = A[i][j] * (1 - A[i][j])
    return res
def sigmoid2D(x):
    try:
        res = [[0] * len(x[0]) for i in range(len(x))]
        for i in range(0 , len(x)):
            for j in range(0 ,len(x[i])):
                res[i][j] = 1 / (1 + exp(-x[i][j]))
        return res
    except Exception:
        print()
        print('X:' , x)
        
    
def mul(A , x):
    res = [0] * len(A)
    for t in range(len(A)):
        temp = []
        for i in range(len(A[t])):
            temp.append(A[t][i] * x)
        res[t] = temp
    return res
def puls(A , x):
    res = [0] * len(A)
    for t in range(len(A)):
        temp = []
        for i in range(len(A[t])):
            temp.append(A[t][i] + x)
        res[t] = temp
    return res

def around2D(A):
    res = [0] * len(A)
    for t in range(len(A)):
        temp = []
        for i in range(len(A[t])):
            v = 1
            if A[t][i] < 0.5:
                v = 0
            temp.append(v)
        res[t] = temp
    return res
def arrayToInt(A):
    l = len(A)
    res = 0
    for i in range(l):
        v = A[i][0] * pow(2, i)
        res += v
    return res

posted on 2016-11-07 15:31  BruceLv  阅读(583)  评论(0编辑  收藏  举报

导航