Visualizing CNN features with heat maps

In this notebook, I use Keras to visualize image features in a convolutional neural network (CNN). This can be a useful tool to debug your models. If you are new to neural networks, this might also give you a better high-level understanding of how image classification works.

This is done in about 70 lines of code, including explanatory comments.

Since we only do forward-passes, all code should run fine on CPUs (assuming your computer isn't too old).

All images were fetched from https://pixabay.com/.

In [168]:
from keras.models import Model
from keras.layers import Conv2D, Input
import numpy as np
import keras.backend as K
from PIL import Image
from IPython.display import display

# prevent OOM issues
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
In [169]:
# returns the i:th layer's activations of model
def get_activations(i, model):
    return K.function([model.layers[0].input], [model.layers[i].output])
In [170]:
# so we can disable all prints at once
# but still get detailed info if we want to debug
def dprint(*args, debug):
    if debug:
        print(args)
In [171]:
# shows the activation heat map of the input image
def show_heatmap(inp_img, model, index, alpha=0.7, debug=False):
    # convert the image into a numpy array
    inp_arr = np.array(inp_img).reshape(1, inp_img.width, inp_img.height, 3)
    # predict the class of the image and print the top 3 predictions
    pred = model.predict([inp_arr])
    print([(label, conf) for _, label, conf in decode_predictions(pred)[0][:3]])
    
    # fetch the activations of layer index
    out = get_activations(index, model)([inp_arr])[0][0]
    dprint("activations", out.shape, debug=debug)
    
    # for each region of the activation map, calculate the average filter activations
    out_avg = np.mean(out, -1)
    dprint("post avg", out_avg.shape, debug=debug)

    # repeat the array into 3 dimensions
    out_avg = np.repeat(out_avg[:, :, np.newaxis], 3, axis=2)
    dprint("post repeat", out_avg.shape, debug=debug)
    
    # normalize the values into the range [0,1]
    dprint("pre normalize", np.amin(out_avg), np.amax(out_avg), debug=debug)
    out_avg /= np.amax(out_avg)
    dprint("post normalize", np.amin(out_avg), np.amax(out_avg), debug=debug)
    
    # transform the values into RGB range with a pink tint
    out_avg *= (255,0,128)
    dprint("post denormalize", np.amin(out_avg), np.amax(out_avg), debug=debug)

    # convert the average activations into an image and resize it to the input shape
    heatmap = Image.fromarray(np.uint8(out_avg))
    heatmap = heatmap.resize((inp_img.width, inp_img.height), Image.BICUBIC)

    # superimpose the heatmap on top of the input image
    input_heatmap = Image.blend(inp_img, heatmap, alpha)
    
    # show the result
    display(input_heatmap)
In [172]:
# use VGG19 with pretrained ImageNet weights
from keras.applications.vgg19 import VGG19, decode_predictions
vgg = VGG19()
In [173]:
# Take activations from the last MaxPool layer
activations_index = -5
assert "block5_pool" == vgg.layers[activations_index].name
vgg.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_59 (InputLayer)        (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv4 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv4 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv4 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
predictions (Dense)          (None, 1000)              4097000   
=================================================================
Total params: 143,667,240
Trainable params: 143,667,240
Non-trainable params: 0
_________________________________________________________________
In [174]:
from glob import glob

def load_images(path, img_w=224, img_h=224):
    paths = glob(path)
    for p in paths:
        inp_img = Image.open(p)
        yield inp_img.resize((img_w, img_h))
In [175]:
for img in load_images('img/*jpg'):
    show_heatmap(img, vgg, index=activations_index, alpha=0.8, debug=False)
[('missile', 0.58071667), ('space_shuttle', 0.23233528), ('projectile', 0.13431515)]
[('volcano', 0.92195445), ('conch', 0.052368388), ('alp', 0.010560113)]
[('wood_rabbit', 0.5641848), ('hare', 0.37138048), ('coyote', 0.018360214)]
[('dalmatian', 0.96605843), ('whippet', 0.015291788), ('German_short-haired_pointer', 0.006996258)]
[('military_uniform', 0.31457028), ('rifle', 0.15194458), ('assault_rifle', 0.087423146)]
[('balloon', 0.99978393), ('parachute', 0.00020682462), ('maraca', 5.5320324e-06)]
[('golfcart', 0.5762165), ('racer', 0.38245296), ('sports_car', 0.011333555)]
[('African_elephant', 0.93167067), ('tusker', 0.042763557), ('Indian_elephant', 0.022968665)]
[('library', 0.9831129), ('bookcase', 0.011910071), ('bookshop', 0.0047296844)]