single = np.array(
    [[[[5, 0, 8, 7, 8, 1],
       [1, 9, 5, 0, 7, 7],
       [6, 0, 2, 4, 6, 6],
       [9, 7, 6, 6, 8, 4],
       [8, 3, 8, 5, 1, 3],
       [7, 2, 7, 0, 1, 0]]]]
single.shape  # (1, 1, 6, 6)

identity = np.array(
    [[[[0, 0, 0],
       [0, 1, 0],
       [0, 0, 0]]]]
identity.shape  # (1, 1, 3, 3)


region = single[:, :, 0:3, 0:3]
filtered_region = region * identity
total = filtered_region.sum()
total  # np.int64(9)


new_region = single[:, :, 0:3, (0 + 1):(3 + 1)]


new_filtered_region = new_region * identity
new_total = new_filtered_region.sum()
new_total  # np.int64(5)


last_horizontal_region = single[:, :, 0:3, (0 + 4):(3 + 4)]

The selected region does not match the shape of the filter anymore. So, if we try to perform the element-wise multiplication, it fails:


image = torch.as_tensor(single).float()
kernel_identity = torch.as_tensor(identity).float()


convolved = F.conv2d(image, kernel_identity, stride=1)


tensor([[[[9., 5., 0., 7.],
          [0., 2., 4., 6.],
          [7., 6., 6., 8.],
          [3., 8., 5., 1.]]]])


conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1)


tensor([[[[-0.2127,  1.4696,  0.3466, -0.1149],
          [-1.8007, -0.3101, -3.0307, -2.1273],
          [-0.8260, -2.0202, -0.9840,  1.2278],
          [ 0.6377,  2.0343,  2.6733,  3.2409]]]],


conv_multiple = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3, stride=1)


Parameter containing:
tensor([[[[-0.0617,  0.2370,  0.3133],
          [ 0.1026, -0.3056, -0.2733],
          [-0.1523, -0.1558,  0.2972]]],

        [[[-0.1122,  0.2076, -0.0521],
          [-0.3075, -0.2325, -0.2017],
          [-0.2267,  0.0176,  0.1970]]]], requires_grad=True)


with torch.no_grad():
    conv.weight[0] = kernel_identity
    conv.bias[0] = 0



tensor([[[[9., 5., 0., 7.],
          [0., 2., 4., 6.],
          [7., 6., 6., 8.],
          [3., 8., 5., 1.]]]], grad_fn=<ConvolutionBackward0>)


get the familiar result.

convolved_stride2 = F.conv2d(image, kernel_identity, stride=2)

# tensor([[[[9., 0.],
#           [7., 6.]]]])


constant_padder = nn.ConstantPad2d(padding=1, value=0)


constant_padder = nn.ConstantPad2d(padding=(1, 1, 1, 1), value=0)

 Same result:

tensor([[[[0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 5., 0., 8., 7., 8., 1., 0.],
          [0., 1., 9., 5., 0., 7., 7., 0.],
          [0., 6., 0., 2., 4., 6., 6., 0.],
          [0., 9., 7., 6., 6., 8., 4., 0.],
          [0., 8., 3., 8., 5., 1., 3., 0.],
          [0., 7., 2., 7., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0.]]]])


padded = F.pad(image, pad=(1, 1, 1, 1), value=0)


padded = F.pad(image, pad=(1, 1, 1, 1), mode='constant', value=0)

 Same result:

tensor([[[[0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 5., 0., 8., 7., 8., 1., 0.],
          [0., 1., 9., 5., 0., 7., 7., 0.],
          [0., 6., 0., 2., 4., 6., 6., 0.],
          [0., 9., 7., 6., 6., 8., 4., 0.],
          [0., 8., 3., 8., 5., 1., 3., 0.],
          [0., 7., 2., 7., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0.]]]])

replication_padder = nn.ReplicationPad2d(padding=1)


padded_replicate = F.pad(image, pad=(1, 1, 1, 1), mode='replicate')

Same result:

tensor([[[[5., 5., 0., 8., 7., 8., 1., 1.],
          [5., 5., 0., 8., 7., 8., 1., 1.],
          [1., 1., 9., 5., 0., 7., 7., 7.],
          [6., 6., 0., 2., 4., 6., 6., 6.],
          [9., 9., 7., 6., 6., 8., 4., 4.],
          [8., 8., 3., 8., 5., 1., 3., 3.],
          [7., 7., 2., 7., 0., 1., 0., 0.],
          [7., 7., 2., 7., 0., 1., 0., 0.]]]])


reflection_padder = nn.ReflectionPad2d(padding=1)


padded_reflect = F.pad(image, pad=(1, 1, 1, 1), mode='reflect')

Same result:

tensor([[[[9., 1., 9., 5., 0., 7., 7., 7.],
          [0., 5., 0., 8., 7., 8., 1., 8.],
          [9., 1., 9., 5., 0., 7., 7., 7.],
          [0., 6., 0., 2., 4., 6., 6., 6.],
          [7., 9., 7., 6., 6., 8., 4., 8.],
          [3., 8., 3., 8., 5., 1., 3., 1.],
          [2., 7., 2., 7., 0., 1., 0., 1.],
          [3., 8., 3., 8., 5., 1., 3., 1.]]]])

In PyTorch, you can use the functional form F.pad() with mode="reflect", or use the module version nn.ReflectionPad2d:

circular_padder = nn.CircularPad2d(padding=1)


padded_circular = F.pad(image, pad=(1, 1, 1, 1), mode='circular')

Same result:

tensor([[[[0., 7., 2., 7., 0., 1., 0., 7.],
          [1., 5., 0., 8., 7., 8., 1., 5.],
          [7., 1., 9., 5., 0., 7., 7., 1.],
          [6., 6., 0., 2., 4., 6., 6., 6.],
          [4., 9., 7., 6., 6., 8., 4., 9.],
          [3., 8., 3., 8., 5., 1., 3., 8.],
          [0., 7., 2., 7., 0., 1., 0., 7.],
          [1., 5., 0., 8., 7., 8., 1., 5.]]]])


edge = np.array(
    [[[[0, 1, 0],
       [1, -4, 1],
       [0, 1, 0]]]]

kernel_edge = torch.as_tensor(edge).float()
kernel_edge.shape  # torch.Size([1, 1, 3, 3])


Let’s apply this filter to our image so we can use the resulting image in our next operation:

padded = F.pad(image, pad=(1, 1, 1, 1), mode='constant', value=0)
conv_padded = F.conv2d(padded, kernel_edge, stride=1)


tensor([[[[-19.,  22., -20., -12., -17.,  11.],
          [ 16., -30.,  -1.,  23.,  -7., -14.],
          [-14.,  24.,   7.,  -2.,   1.,  -7.],
          [-15., -10.,  -1.,  -1., -15.,   1.],
          [-13.,  13., -11.,  -5.,  13.,  -7.],
          [-18.,   9., -18.,  13.,  -3.,   4.]]]])


pooled = F.max_pool2d(conv_padded, kernel_size=2)


maxpooler = nn.MaxPool2d(kernel_size=2)
pooled = maxpooler(conv_padded)

Same result:

tensor([[[[22., 23., 11.],
          [24.,  7.,  1.],
          [13., 13., 13.]]]])

And then let’s use the module version to illustrate the large four-by-four pooling:

maxpooler4 = nn.MaxPool2d(kernel_size=4)
pooled4 = maxpooler4(conv_padded)

# tensor([[[[24.]]]])


F.max_pool2d(conv_padded, kernel_size=3, stride=1)


tensor([[[[24., 24., 23., 23.],
          [24., 24., 23., 23.],
          [24., 24., 13., 13.],
          [13., 13., 13., 13.]]]])


flatenned = nn.Flatten()(pooled)

# tensor([[22., 23., 11., 24.,  7.,  1., 13., 13., 13.]])

It has no functional version, but there is no need for one since we can accomplish the same thing using view():

pooled.view(1, -1)

# tensor([[22., 23., 11., 24.,  7.,  1., 13., 13., 13.]])


Adapting LeNet-5 to today’s standards, it could be implemented like this:

lenet = nn.Sequential()

# Featurizer
# Block 1: 1@28x28 -> 6@28x28 -> 6@14x14
lenet.add_module('C1', nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2))
lenet.add_module('func1', nn.ReLU())
lenet.add_module('S2', nn.MaxPool2d(kernel_size=2))
# Block 2: 6@14x14 -> 16@10x10 -> 16@5x5
lenet.add_module('C3', nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5))
lenet.add_module('func2', nn.ReLU())
lenet.add_module('S4', nn.MaxPool2d(kernel_size=2))
# Block 3: 16@5x5 -> 120@1x1
lenet.add_module('C5', nn.Conv2d(in_channels=15, out_channels=120, kernel_size=5))
lenet.add_module('func2', nn.ReLU())
# Flattening
lenet.add_module('flatten', nn.Flatten())

# Classification
# Hidden Layer
lenet.add_module('F6', nn.Linear(in_features=120, out_features=84))
lenet.add_module('func3', nn.ReLU())
# Output Layer
lenet.add_module('OUTPUT', nn.Linear(in_features=84, out_features=10))


images, labels = generate_dataset(img_size=10, n_images=1000, binary=False, seed=17)


# Build tensors from numpy arrays BEFORE split
# Modify the scale of pixel values from [0, 255] to [0, 1]
x_tensor = torch.as_tensor(images / 255).float()
y_tensor = torch.as_tensor(labels).long()

# Use index_splitter to generate indices for training and validation sets
train_idx, val_idx = index_splitter(len(x_tensor), [80, 20])

# Use indices to perform the split
x_train_tensor = x_tensor[train_idx]
y_train_tensor = y_tensor[train_idx]
x_val_tensor = x_tensor[val_idx]
y_val_tensor = y_tensor[val_idx]

# We're not doing any data augmentation now
train_composer = Compose([Normalize(mean=(.5,), std=(.5,))])
val_composer = Compose([Normalize(mean=(.5,), std=(.5,))])

# Use custom dataset to apply composed transforms to each set
train_dataset = TransformedTensorDataset(x_train_tensor, y_train_tensor, transform=train_composer)
val_dataset = TransformedTensorDataset(x_val_tensor, y_val_tensor, transform=val_composer)

# Build a weighted random sampler to handle imbalanced classes
sampler = make_balanced_sampler(y_train_tensor)

# Use sampler in the training set to get a balanced data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=16, sampler=sampler)
val_loader = DataLoader(dataset=val_dataset, batch_size=16)

Before defining a model to classify our images, we need to discuss something else: the loss function.

logits = torch.tensor([ 1.3863,  0.0000, -0.6931])

We exponentiate the logits to get the corresponding odds ratios:

odds_ratios = torch.exp(logits)

# tensor([4.0000, 1.0000, 0.5000])


softmaxed = odds_ratios / odds_ratios.sum()

# tensor([0.7273, 0.1818, 0.0909])


nn.Softmax(dim=-1)(logits), F.softmax(logits, dim=-1)

# (tensor([0.7273, 0.1818, 0.0909]), tensor([0.7273, 0.1818, 0.0909]))



logits = torch.tensor([ 1.3863,  0.0000, -0.6931])
log_probs = F.log_softmax(logits, dim=-1)

# tensor([-0.3185, -1.7048, -2.3979])


label = torch.tensor([2])
F.nll_loss(log_probs.view(-1, 3), label)

# tensor(2.3979)

It is the negative of the log probability corresponding to the class index (two) of the true label.

dummy_logits = torch.randn((5, 3))
dummy_labels = torch.tensor([0, 0, 1, 2, 1])
dummy_log_probs = F.log_softmax(dummy_logits, dim=-1)


tensor([[-1.5229, -0.3146, -2.9600],
        [-1.7934, -1.0044, -0.7607],
        [-1.2513, -1.0136, -1.0471],
        [-2.6799, -0.2219, -2.0367],
        [-1.0728, -1.9098, -0.6737]])


relevant_log_probs = torch.tensor([-1.5229, -1.7934, -1.0136, -2.0367, -1.9098])

# tensor(1.6553)


loss_fn = nn.NLLLoss()
loss_fn(dummy_log_probs, dummy_labels)

# tensor(1.6553)


loss_fn = nn.NLLLoss(weight=torch.tensor([1., 1., 2.]))
loss_fn(dummy_log_probs, dummy_labels)

# tensor(1.7188)


loss_fn = nn.NLLLoss(ignore_index=2)
loss_fn(dummy_log_probs, dummy_labels)

# tensor(1.5599)



dummy_logits = torch.randn((5, 3))
dummy_labels = torch.tensor([0, 0, 1, 2, 1])

loss_fn = nn.CrossEntropyLoss()
loss_fn(dummy_logits, dummy_labels)

# tensor(1.6553)

No logsoftmax whatsoever, but the same resulting loss, as expected.


model_cnn1 = nn.Sequential()

# Featurizer
# Block 1: 1@10x10 -> n_channels@8x8 -> n_channels@4x4
n_channels = 1
model_cnn1.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=n_channels, kernel_size=3))
model_cnn1.add_module('relu1', nn.ReLU())
model_cnn1.add_module('maxp1', nn.MaxPool2d(kernel_size=2))
# Flattening: n_channels * 4 * 4
model_cnn1.add_module('flatten', nn.Flatten())




lr = .1
ce_loss_fn = nn.CrossEntropyLoss(reduction='mean')
optimizer_cnn1 = optim.SGD(model_cnn1.parameters(), lr=lr)


sbs_cnn1 = StepByStep(model_cnn1, ce_loss_fn, optimizer_cnn1)
sbs_cnn1.set_loaders(train_loader, val_loader)



def _visualize_tensors(axs, x, y=None, yhat=None, layer_name='', title=None):
    n_images = len(axs)
    # Gets max and min values for scaling the grayscale
    minv, maxv = np.min(x[:n_images]), np.max(x[:n_images])

    for i, img in enumerate(x[:n_images]):
        ax = axs[i]
        # Set title, labels, and remove ticks
        if title is not None:
            ax.set_title(f'{title} #{i}', fontsize=12)
        shp = np.atleast_2d(img).shape
        ax.set_ylabel(f'{layer_name}\n{shp[0]}x{shp[1]}', rotation=0, fontsize=12, labelpad=20)
        xlabel1 = '' if y is None else f'\nLabel: {y[i]}'
        xlabel2 = '' if yhat is None else f'\nPredicted: {yhat[i]}'
        xlabel = f'{xlabel1}{xlabel2}'
        if len(xlabel):
            ax.set_xlabel(xlabel, fontsize=12)

        # Plot weight as an image
        ax.imshow(np.atleast_2d(img.squeeze()), cmap='gray', vmin=minv, vmax=maxv)



weights_filter = model_cnn1.conv1.weight.data.cpu().numpy()

# (1, 1, 3, 3)


def visualize_filters(self, layer_name, **kwargs):
        # Get the layer object from the model
        layer = getattr(self.model, layer_name)

        # We are only looking at filters for 2D convolutions
        if isinstance(layer, nn.Conv2d):
            weights = layer.weight.data.cpu().numpy()
            # weights -> (ou_channels (filter), in_channels, filter_H, filter_W)
            n_filters, n_in_channels, _, _ = weights.shape

            # Build a figure
            figsize = (2 * n_in_channels + 2, 2 * n_filters)
            fig, axs = plt.subplots(n_filters, n_in_channels, figsize=figsize, squeeze=False)
            axs_array = [[axs[i, j] for j in range(n_in_channels)] for i in range(n_filters)]

            # For each filter
            for i in range(n_filters):
                    layer_name=f'Filter #{i}',

            for ax in axs.flat:

    except AttributeError:

OK, let’s see what the filter looks like:

dummy_model = nn.Linear(1, 1)

dummy_list = []

def dummy_hook(layer, inputs, outputs):
    dummy_list.append((layer, inputs, outputs))


dummy_handle = dummy_model.register_forward_hook(dummy_hook)

# <torch.utils.hooks.RemovableHandle at 0x727670b65eb0>

Simple enough, right? Let’s see it in action:

dummy_x = torch.tensor([0.3])

# tensor([-0.8366], grad_fn=<ViewBackward0>)



# []



# tensor([-0.8366], grad_fn=<ViewBackward0>)



# [(Linear(in_features=1, out_features=1, bias=True),
#  (tensor([0.3000]),),
#  tensor([-0.8366], grad_fn=<ViewBackward0>))]




modules = list(sbs_cnn1.model.named_modules())


    (conv1): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
    (relu1): ReLU()
    (maxp1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (fc1): Linear(in_features=16, out_features=10, bias=True)
    (relu2): ReLU()
    (fc2): Linear(in_features=10, out_features=3, bias=True)
 ('conv1', Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))),
 ('relu1', ReLU()),
  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)),
 ('flatten', Flatten(start_dim=1, end_dim=-1)),
 ('fc1', Linear(in_features=16, out_features=10, bias=True)),
 ('relu2', ReLU()),
 ('fc2', Linear(in_features=10, out_features=3, bias=True))]


layer_names = {layer: name for name, layer in modules[1:]}


{Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1)): 'conv1',
 ReLU(): 'relu1',
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False): 'maxp1',
 Flatten(start_dim=1, end_dim=-1): 'flatten',
 Linear(in_features=16, out_features=10, bias=True): 'fc1',
 ReLU(): 'relu2',
 Linear(in_features=10, out_features=3, bias=True): 'fc2'}


visualization = {}

def hook_fn(layer, inputs, outputs):
    name = layer_names[layer]
    visualization[name] = outputs.detach().cpu().numpy()


layers_to_hook = ['conv1', 'relu1', 'maxp1', 'flatten', 'fc1', 'relu2', 'fc2']

handles = {}

for name, layer in modules:
    if name in layers_to_hook:
        handles[name] = layer.register_forward_hook(hook_fn)


images_batch, labels_batch = next(iter(val_loader))
logits = sbs_cnn1.predict(images_batch)

Now, if everything went well, our visualization dictionary should contain one key for each layer we hooked a function to:


# dict_keys(['conv1', 'relu1', 'maxp1', 'flatten', 'fc1', 'relu2', 'fc2'])

Bingo! They are all there! But, before checking what’s stored inside it, let’s remove the hooks:

for handle in handles.values():



{'conv1': <torch.utils.hooks.RemovableHandle at 0x727671e38860>,
 'relu1': <torch.utils.hooks.RemovableHandle at 0x727671f16540>,
 'maxp1': <torch.utils.hooks.RemovableHandle at 0x727671f73320>,
 'flatten': <torch.utils.hooks.RemovableHandle at 0x727671f71940>,
 'fc1': <torch.utils.hooks.RemovableHandle at 0x727671f72870>,
 'relu2': <torch.utils.hooks.RemovableHandle at 0x727671f73fe0>,
 'fc2': <torch.utils.hooks.RemovableHandle at 0x727671f16e70>}



Update class StepByStep:

        self.visualization = {}
        self.handles = {}

    def attach_hooks(self, layers_to_hook, hook_fn=None):
        # Clear any previous values
        self.visualization = {}

        # Create the dictionary to map layer objects to their names
        modules = list(self.model.named_modules())
        layer_names = {layer: name for name, layer in modules[1:]}

        if hook_fn is None:
            # Hook function to be attached to the forward pass
            def hook_fn(layer, inputs, outputs):
                # Get the layer name
                name = layer_names[layer]
                # Detach the outputs
                values = outputs.detach().cpu().numpy()
                # Since the hook function may be called multiple times for example, 
                # if we make predictions for multiple mini-batches it concatenates the results.
                if self.visualization[name] is None:
                    self.visualization[name] = values
                    self.visualization[name] = np.concatenate([self.visualization[name], values])

        for name, layer in modules:
            if name in layers_to_hook:
                # Initialize the corresponding key in the dictionary
                self.visualization[name] = None
                # Register the forward hook and keep the handle in another dict
                self.handles[name] = layer.register_forward_hook(hook_fn)

    def remove_hooks(self):
        for handle in self.handles.values():
        # Clear the dict, as all hooks have been removed
        self.handles = {}

The procedure is fairly straightforward now: Give it a list containing the names of the layers to attach hooks to, and you’re done!

sbs_cnn1.attach_hooks(layers_to_hook=['conv1', 'relu1', 'maxp1', 'flatten', 'fc1', 'relu2', 'fc2'])

To get the visualization attribute filled with values, we still need to make predictions:

images_batch, labels_batch = next(iter(val_loader))
logits = sbs_cnn1.predict(images_batch)

# (16, 3)



predicted = np.argmax(logits, axis=1)

# array([0, 0, 0, 2, 2, 1, 0, 2, 1, 2, 0, 1, 0, 2, 2, 1])


First, let’s visualize the first ten images sampled from the validation loader:

for layer, feature_map in sbs_cnn1.visualization.items():
    print(layer, feature_map.shape)


conv1 (16, 1, 8, 8)
relu1 (16, 1, 8, 8)
maxp1 (16, 1, 4, 4)
flatten (16, 16)
fc1 (16, 10)
relu2 (16, 10)
fc2 (16, 3)

To visualize the feature maps, we can add another method to our class: visualize_outputs(). This method simply retrieves the captured feature maps from the visualization dictionary and uses our _visualize_tensors() method to plot them:

    def visualize_outputs(self, layers, n_images=16, y=None, yhat=None):
        layers = filter(lambda l: l in self.visualization.keys(), layers)
        layers = list(layers)
        shapes = [self.visualization[layer].shape for layer in layers]
        n_rows = [shape[1] if len(shape) == 4 else 1 for shape in shapes]  # number of output channels
        total_rows = np.sum(n_rows)

        fig, axs = plt.subplots(total_rows, n_images, figsize=(1.5 * n_images, 1.5 * total_rows), squeeze=False)
        axs_array = [[axs[i, j] for j in range(n_images)] for i in range(total_rows)]

        # Loop through the layers, one layer per row of subplots
        row = 0
        for i, layer in enumerate(layers):
            start_row = row
            # Take the produced feature maps for that layer
            output = self.visualization[layer]

            is_vector = len(output.shape) == 2

            for j in range(n_rows[i]):
                    output if is_vector else output[:, j].squeeze(),
                    layer_name=layers[i] if is_vector else f'{layers[i]}\nfil#{row - start_row}',
                    title='Image' if row == 0 else None
                row += 1

        for ax in axs.flat:

        return fig

Then, let’s use the method above to plot the feature maps for the layers in the featurizer part of our model:

• For diagonals tilted to the left (images #3, #4, #7, #13 and #14), the filter seems to suppress the diagonal completely.
• For parallel lines (images #0, #1, #2, #6, #9, #10, and #12), the filter produces a striped pattern, brighter to the left/top of the original line, darker to its right/bottom.
• For diagonals tilted to the right (images #11 and #15), the filter produces a thicker line with multiple shades

Then, the ReLU activation function removes the negative values. Unfortunately, after this operation, image #9 (parallel horizontal lines) had all lines suppressed and seem indistinguishable from images #3, #4, #7, # 13 and #14 (diagonals tilted to the left).

The classifier got 15 out of 16 right. It made wrong prediction for image #9. Unsurprisingly, this is the image that got its horizontal line suppressed. The filter doesn’t seem to work so well whenever the horizontal line is too close to the top edge of the image.

def correct(self, x, y, threshold=.5):
    yhat = self.model(x.to(self.device))
    y = y.to(self.device)
    # We get the size of the batch and the number of classes (only 1, if it is binary)
    n_samples, n_dims = yhat.shape
    if n_dims > 1:        
        # In a multiclass classification, the biggest logit always wins, so we don't bother getting probabilities
        # This is PyTorch's version of argmax, but it returns a tuple: (max value, index of max value)
        _, predicted = torch.max(yhat, 1)
        n_dims += 1
        # In binary classification, we NEED to check if the last layer is a sigmoid (and then it produces probs)
        if isinstance(self.model, nn.Sequential) and isinstance(self.model[-1], nn.Sigmoid):
            predicted = (yhat > threshold).long()
        # or something else (logits), which we need to convert using a sigmoid
            predicted = (F.sigmoid(yhat) > threshold).long()
    # How many samples got classified correctly for each class
    result = []
    for c in range(n_dims):
        n_class = (y == c).sum().item()
        n_correct = (predicted[y == c] == c).sum().item()
        result.append((n_correct, n_class))

    return torch.tensor(result)

setattr(StepByStep, 'correct', correct)

If the labels have two or more columns, it means we’re dealing with a multiclass classification: The predicted class is the one with the largest logit.

sbs_cnn1.correct(images_batch, labels_batch)


tensor([[6, 7],
        [4, 4],
        [5, 5]])

So, there is only 1 wrong prediction, for class #0 (parallel lines), corresponding to image #9, as we’ve already seen in the previous section.

def loader_apply(loader, func, reduce='sum'):
    results = [func(x, y) for i, (x, y) in enumerate(loader)]
    results = torch.stack(results, axis=0)

    if reduce == 'sum':
        results = results.sum(axis=0)
    elif reduce == 'mean':
        results = results.float().mean(axis=0)
    return results

setattr(StepByStep, 'loader_apply', loader_apply)


StepByStep.loader_apply(sbs_cnn1.val_loader, sbs_cnn1.correct)


tensor([[47, 58],
        [57, 67],
        [75, 75]])


From the results above, we see that our model got 179 out of 200 images correctly classified in the validation set, an accuracy of 89.5%! Not bad :-)

Data Preparation

# Build tensors from numpy arrays BEFORE split
# Modify the scale of pixel values from [0, 255] to [0, 1]
x_tensor = torch.as_tensor(images / 255).float()
y_tensor = torch.as_tensor(labels).long()

# Use index_splitter to generate indices for training and validation sets
train_idx, val_idx = index_splitter(len(x_tensor), [80, 20])

# Use indices to perform the split
x_train_tensor = x_tensor[train_idx]
y_train_tensor = y_tensor[train_idx]
x_val_tensor = x_tensor[val_idx]
y_val_tensor = y_tensor[val_idx]

# We're not doing any data augmentation now
train_composer = Compose([Normalize(mean=(.5,), std=(.5,))])
val_composer = Compose([Normalize(mean=(.5,), std=(.5,))])

# Use custom dataset to apply composed transforms to each set
train_dataset = TransformedTensorDataset(x_train_tensor, y_train_tensor, transform=train_composer)
val_dataset = TransformedTensorDataset(x_val_tensor, y_val_tensor, transform=val_composer)

# Build a weighted random sampler to handle imbalanced classes
sampler = make_balanced_sampler(y_train_tensor)

# Use sampler in the training set to get a balanced data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=16, sampler=sampler)
val_loader = DataLoader(dataset=val_dataset, batch_size=16)

Model Configuration


model_cnn1 = nn.Sequential()

# Featurizer
# Block 1: 1@10x10 -> n_channels@8x8 -> n_channels@4x4
n_channels = 1
model_cnn1.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=n_channels, kernel_size=3))
model_cnn1.add_module('relu1', nn.ReLU())
model_cnn1.add_module('maxp1', nn.MaxPool2d(kernel_size=2))
# Flattening: n_channels * 4 * 4
model_cnn1.add_module('flatten', nn.Flatten())

# Classification
# Hidden Layer
model_cnn1.add_module('fc1', nn.Linear(in_features=n_channels*4*4, out_features=10))
model_cnn1.add_module('relu2', nn.ReLU())
# Output Layer
model_cnn1.add_module('fc2', nn.Linear(in_features=10, out_features=3))

lr = .1
ce_loss_fn = nn.CrossEntropyLoss(reduction='mean')
optimizer_cnn1 = optim.SGD(model_cnn1.parameters(), lr=lr)

Model Training

sbs_cnn1 = StepByStep(model_cnn1, ce_loss_fn, optimizer_cnn1)
sbs_cnn1.set_loaders(train_loader, val_loader)

Visualizing Filters

fig_filters = sbs_cnn1.visualize_filters('conv1', cmap='gray')

Capturing Outputs

featurizer_layers = ['conv1', 'relu1', 'maxp1', 'flatten']
classifier_layers = ['fc1', 'relu2', 'fc2']

sbs_cnn1.attach_hooks(layers_to_hook=featurizer_layers + classifier_layers)

images_batch, labels_batch = next(iter(val_loader))
logits = sbs_cnn1.predict(images_batch)
predicted = np.argmax(logits, 1)


Visualizing Feature Maps

fig_maps1 = sbs_cnn1.visualize_outputs(featurizer_layers)
fig_maps2 = sbs_cnn1.visualize_outputs(classifier_layers, y=labels_batch, yhat=predicted)

Evaluating Accuracy

StepByStep.loader_apply(sbs_cnn1.val_loader, sbs_cnn1.correct)


