先看一下其基本的组件函数,首先是determine_padding(filter_shape, output_shape="same"):
def determine_padding(filter_shape, output_shape="same"): # No padding if output_shape == "valid": return (0, 0), (0, 0) # Pad so that the output shape is the same as input shape (given that stride=1) elif output_shape == "same": filter_height, filter_width = filter_shape # Derived from: # output_height = (height + pad_h - filter_height) / stride + 1 # In this case output_height = height and stride = 1. This gives the # expression for the padding below. pad_h1 = int(math.floor((filter_height - 1)/2)) pad_h2 = int(math.ceil((filter_height - 1)/2)) pad_w1 = int(math.floor((filter_width - 1)/2)) pad_w2 = int(math.ceil((filter_width - 1)/2)) return (pad_h1, pad_h2), (pad_w1, pad_w2)
- math.floor(x)表示返回小于或等于x的最大整数。
- math.ceil(x)表示返回大于或等于x的最大整数。
pad_h,pad_w=determine_padding((3,3), output_shape="same")
然后是image_to_column(images, filter_shape, stride, output_shape='same')函数
def image_to_column(images, filter_shape, stride, output_shape='same'): filter_height, filter_width = filter_shape pad_h, pad_w = determine_padding(filter_shape, output_shape)# Add padding to the image images_padded = np.pad(images, ((0, 0), (0, 0), pad_h, pad_w), mode='constant')# Calculate the indices where the dot products are to be applied between weights # and the image k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride) # Get content from image at those indices cols = images_padded[:, k, i, j] channels = images.shape[1] # Reshape content into column shape cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1) return cols
def get_im2col_indices(images_shape, filter_shape, padding, stride=1): # First figure out what the size of the output should be batch_size, channels, height, width = images_shape filter_height, filter_width = filter_shape pad_h, pad_w = padding out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1) out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1) i0 = np.repeat(np.arange(filter_height), filter_width) i0 = np.tile(i0, channels) i1 = stride * np.repeat(np.arange(out_height), out_width) j0 = np.tile(np.arange(filter_width), filter_height * channels) j1 = stride * np.tile(np.arange(out_width), out_height) i = i0.reshape(-1, 1) + i1.reshape(1, -1) j = j0.reshape(-1, 1) + j1.reshape(1, -1) k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1)return (k, i, j)
get_im2col_indices((1,3,32,32), (3,3), ((1,1),(1,1)), stride=1)
- i0:np.repeat(np.arange(3),3):[0 ,0,0,1,1,1,2,2,2]
- i0:np.tile([0,0,0,1,1,1,2,2,2],3):[0,0,0,1,1,1,2,2,2,0,0,0,1,1,1,2,2,2,0,0,0,1,1,1,2,2,2],大小为:(27,)
- i1:1*np.repeat(np.arange(32),32):[0,0,0......,31,31,31],大小为:(1024,)
- j0:np.tile(np.arange(3),3*3):[0,1,2,0,1,2,......],大小为:(27,)
- j1:1*np.tile(np.arange(32),32):[0,1,2,3,......,0,1,2,......,29,30,31],大小为(1024,)
- i:i0.reshape(-1,1)+i1.reshape(1,-1):大小(27,1024)
- j:j0.reshape(-1,1)+j1.reshape(1,-1):大小(27,1024)
- k:np.repeat(np.arange(3),3*3).reshape(-1,1):大小(27,1)
- numpy.pad(array, pad_width, mode, **kwargs):array是要要被填充的数据,第二个参数指定填充的长度,mod用于指定填充的数据,默认是0,如果是constant,则需要指定填充的值。
- numpy.arange(start, stop, step, dtype = None):举例numpy.arange(3),输出[0,1,2]
- numpy.repeat(array,repeats,axis=None):举例numpy.repeat([0,1,2],3),输出:[0,0,0,1,1,1,2,2,2]
- numpy.tile(array,reps):举例numpy.tile([0,1,2],3),输出:[0,1,2,0,1,2,0,1,2]
- 具体的更复杂的用法还是得去查相关资料。这里只列举出与本代码相关的。
cols = images_padded[:, k, i, j] channels = images.shape[1] # Reshape content into column shape cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
class Layer(object): def set_input_shape(self, shape): """ Sets the shape that the layer expects of the input in the forward pass method """ self.input_shape = shape def layer_name(self): """ The name of the layer. Used in model summary. """ return self.__class__.__name__ def parameters(self): """ The number of trainable parameters used by the layer """ return 0 def forward_pass(self, X, training): """ Propogates the signal forward in the network """ raise NotImplementedError() def backward_pass(self, accum_grad): """ Propogates the accumulated gradient backwards in the network. If the has trainable weights then these weights are also tuned in this method. As input (accum_grad) it receives the gradient with respect to the output of the layer and returns the gradient with respect to the output of the previous layer. """ raise NotImplementedError() def output_shape(self): """ The shape of the output produced by forward_pass """ raise NotImplementedError()
对于子类继承该基类必须要实现的方法,如果没有实现使用raise NotImplementedError()抛出异常。
class Conv2D(Layer): """A 2D Convolution Layer. Parameters: ----------- n_filters: int The number of filters that will convolve over the input matrix. The number of channels of the output shape. filter_shape: tuple A tuple (filter_height, filter_width). input_shape: tuple The shape of the expected input of the layer. (batch_size, channels, height, width) Only needs to be specified for first layer in the network. padding: string Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width matches the input height and width. For 'valid' no padding is added. stride: int The stride length of the filters during the convolution over the input. """ def __init__(self, n_filters, filter_shape, input_shape=None, padding='same', stride=1): self.n_filters = n_filters self.filter_shape = filter_shape self.padding = padding self.stride = stride self.input_shape = input_shape self.trainable = True def initialize(self, optimizer): # Initialize the weights filter_height, filter_width = self.filter_shape channels = self.input_shape[0] limit = 1 / math.sqrt(np.prod(self.filter_shape)) self.W = np.random.uniform(-limit, limit, size=(self.n_filters, channels, filter_height, filter_width)) self.w0 = np.zeros((self.n_filters, 1)) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer) def parameters(self): return np.prod(self.W.shape) + np.prod(self.w0.shape) def forward_pass(self, X, training=True): batch_size, channels, height, width = X.shape self.layer_input = X # Turn image shape into column shape # (enables dot product between input and weights) self.X_col = image_to_column(X, self.filter_shape, stride=self.stride, output_shape=self.padding) # Turn weights into column shape self.W_col = self.W.reshape((self.n_filters, -1)) # Calculate output output = self.W_col.dot(self.X_col) + self.w0 # Reshape into (n_filters, out_height, out_width, batch_size) output = output.reshape(self.output_shape() + (batch_size, )) # Redistribute axises so that batch size comes first return output.transpose(3,0,1,2) def backward_pass(self, accum_grad): # Reshape accumulated gradient into column shape accum_grad = accum_grad.transpose(1, 2, 3, 0).reshape(self.n_filters, -1) if self.trainable: # Take dot product between column shaped accum. gradient and column shape # layer input to determine the gradient at the layer with respect to layer weights grad_w = accum_grad.dot(self.X_col.T).reshape(self.W.shape) # The gradient with respect to bias terms is the sum similarly to in Dense layer grad_w0 = np.sum(accum_grad, axis=1, keepdims=True) # Update the layers weights self.W = self.W_opt.update(self.W, grad_w) self.w0 = self.w0_opt.update(self.w0, grad_w0) # Recalculate the gradient which will be propogated back to prev. layer accum_grad = self.W_col.T.dot(accum_grad) # Reshape from column shape to image shape accum_grad = column_to_image(accum_grad, self.layer_input.shape, self.filter_shape, stride=self.stride, output_shape=self.padding) return accum_grad def output_shape(self): channels, height, width = self.input_shape pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding) output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride + 1 output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride + 1 return self.n_filters, int(output_height), int(output_width)
self.X_col的大小就是(27,1024),self.W_col的大小是(16,27),那么output = self.W_col.dot(self.X_col) + self.w0的大小就是(16,1024)
image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8) input_shape=image.squeeze().shape conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='same', stride=1) conv2d.initialize(None) output=conv2d.forward_pass(image,training=True) print(output.shape)
image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8) input_shape=image.squeeze().shape conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='valid', stride=1) conv2d.initialize(None) output=conv2d.forward_pass(image,training=True) print(output.shape) print(conv2d.parameters())
image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8) input_shape=image.squeeze().shape conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='valid', stride=2) conv2d.initialize(None) output=conv2d.forward_pass(image,training=True) print(output.shape) print(conv2d.parameters())
卷积层参数计算公式 :params=卷积核高×卷积核宽×通道数目×卷积核数目+偏置项(卷积核数目)
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步