import numpy as np
np.set_printoptions(suppress=True)
# 1,1,10,10; 1,1,2,2; 1,10,10; 1,10,10; 1,1,9,9

N = 1
IC = 1
FH = FW = 2
OC = 1
OH = OW = 3
IH = IW = 4
RIH = RIW = IH
ROH = ROW = OH

filter = np.random.randn(OC, IC, FH, FW)
filter = np.ones((OC,IC,FH,FW))
print(filter.shape)
print(filter)
filter_list = np.reshape(filter, (-1))
filter_list

(1, 1, 2, 2)
[[[[1. 1.]
   [1. 1.]]]]

array([1., 1., 1., 1.])

diff = np.random.randn(N, OC, OH, OW)
diff = np.arange(1,10).reshape(N,OC,OH,OW)
print(diff.shape)
print(diff)
diff_list = np.reshape(diff, (-1))
diff_list

(1, 1, 3, 3)
[[[[1 2 3]
   [4 5 6]
   [7 8 9]]]]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

rin = np.random.randint(0,2,(N, RIH, RIW))
print(rin.shape)
rin

(1, 4, 4)

array([[[1, 1, 1, 1],
        [0, 1, 1, 1],
        [0, 0, 1, 1],
        [0, 1, 1, 1]]])

rout = np.random.randint(0,2,(N, ROH, ROW))
print(rout.shape)
rout

(1, 3, 3)

array([[[0, 1, 1],
        [1, 0, 0],
        [1, 0, 1]]])

grad = np.zeros((N, IC, IH, IW))
grad.shape

(1, 1, 4, 4)

# iterating through input make coding easier
# think of each grad elems accumulates what diff 
for i in range(IH):
    for j in range(IW):
        for p in range(FH):
            for q in range(FW):
                # some elems in input may not interact with every filter elems.
                topidx = i - p
                leftidx = j - q
                rightidx = leftidx+FW-1
                bottomidx = topidx+FH-1
                if (topidx >= 0 and leftidx >= 0 and bottomidx < IH and rightidx < IW): # a valid conv
                    if rin[0, i, j] == rout[0, topidx, leftidx]: # if mask not equals, the multiply results in 0.
                        # each input "may" iter through all filter elems
                            grad[0,0,i,j] += filter[0,0,p,q] * diff[0,0,topidx, leftidx]
                            print("grad[{}][{}] accumlate filter[{}][{}] and diff[{}][{}]".format(i,j,p,q,topidx,leftidx))

grad[0][1] accumlate filter[0][0] and diff[0][1]
grad[0][2] accumlate filter[0][0] and diff[0][2]
grad[0][2] accumlate filter[0][1] and diff[0][1]
grad[0][3] accumlate filter[0][1] and diff[0][2]
grad[1][0] accumlate filter[1][0] and diff[0][0]
grad[1][1] accumlate filter[0][1] and diff[1][0]
grad[1][1] accumlate filter[1][0] and diff[0][1]
grad[1][2] accumlate filter[1][0] and diff[0][2]
grad[1][2] accumlate filter[1][1] and diff[0][1]
grad[1][3] accumlate filter[1][1] and diff[0][2]
grad[2][1] accumlate filter[0][0] and diff[2][1]
grad[2][1] accumlate filter[1][0] and diff[1][1]
grad[2][2] accumlate filter[0][0] and diff[2][2]
grad[2][3] accumlate filter[0][1] and diff[2][2]
grad[3][1] accumlate filter[1][1] and diff[2][0]
grad[3][2] accumlate filter[1][0] and diff[2][2]
grad[3][3] accumlate filter[1][1] and diff[2][2]

grad

array([[[[ 0.,  2.,  5.,  3.],
         [ 1.,  6.,  5.,  3.],
         [ 0., 13.,  9.,  9.],
         [ 0.,  7.,  9.,  9.]]]])

grad.reshape(-1)

array([ 0.,  2.,  5.,  3.,  1.,  6.,  5.,  3.,  0., 13.,  9.,  9.,  0.,
        7.,  9.,  9.])

import numpy as np
np.set_printoptions(suppress=True)
# 1,2,4,4; 2,1,1,2,2; 1,2,3,3; 1,4,4; 1,3,3;

N = 1
IC = 1
FH = FW = 2
OC = 1
OH = OW = 1
IH = IW = 2
RIH = RIW = IH
ROH = ROW = OH
GROUP = 2
OCPG = 1
ICPG = 1

filter = np.random.randn(N, GROUP, FH, FW) # G,OCPG, ICPG, FH, FW
filter = np.ones((N, GROUP, FH, FW))
print(filter.shape)
print(filter)
filter_list = np.reshape(filter, (-1))
filter_list

(1, 2, 2, 2)
[[[[1. 1.]
   [1. 1.]]

  [[1. 1.]
   [1. 1.]]]]

array([1., 1., 1., 1., 1., 1., 1., 1.])

diff = np.random.randn(N, GROUP*OCPG, OH, OW)
diff = np.arange(1,N*GROUP*OCPG*OH*OW+1).reshape(N, GROUP*OCPG, OH, OW)
print(diff.shape)
print(diff)
diff_list = np.reshape(diff, (-1))
diff_list

(1, 2, 1, 1)
[[[[1]]

  [[2]]]]

array([1, 2])

rin = np.random.randint(0,2,(N, RIH, RIW))
print(rin.shape)
rin

(1, 2, 2)

array([[[1, 1],
        [0, 1]]])

rout = np.random.randint(0,2,(N, ROH, ROW))
print(rout.shape)
rout

(1, 1, 1)

array([[[1]]])

grad = np.zeros((N, GROUP*ICPG, IH, IW))
grad.shape

(1, 2, 2, 2)

# iterating through input make coding easier
# think of each grad elems accumulates what diff 
for g in range(GROUP):
    for i in range(IH):
        for j in range(IW):
            for p in range(FH):
                for q in range(FW):
                    # some elems in input may not interact with every filter elems.
                    topidx = i - p
                    leftidx = j - q
                    rightidx = leftidx+FW-1
                    bottomidx = topidx+FH-1
                    if (topidx >= 0 and leftidx >= 0 and bottomidx < IH and rightidx < IW): # a valid conv
                        if rin[0, i, j] == rout[0, topidx, leftidx]: # if mask not equals, the multiply results in 0.
                            # each input "may" iter through all filter elems
                                grad[0,g,i,j] += filter[0,g,p,q] * diff[0,g,topidx, leftidx]
                                print("in group: {}".format(g))
                                print("grad[{}][{}] accumlate filter[{}][{}] and diff[{}][{}]".format(i,j,p,q,topidx,leftidx))

in group: 0
grad[0][0] accumlate filter[0][0] and diff[0][0]
in group: 0
grad[0][1] accumlate filter[0][1] and diff[0][0]
in group: 0
grad[1][1] accumlate filter[1][1] and diff[0][0]
in group: 1
grad[0][0] accumlate filter[0][0] and diff[0][0]
in group: 1
grad[0][1] accumlate filter[0][1] and diff[0][0]
in group: 1
grad[1][1] accumlate filter[1][1] and diff[0][0]

grad

array([[[[1., 1.],
         [0., 1.]],

        [[2., 2.],
         [0., 2.]]]])

grad.reshape(-1)

array([1., 1., 0., 1., 2., 2., 0., 2.])

0x01

computer arch/parallel programming/

naive conv backward data impl with python