Moving beyond Labels: Finetuning CNNs on BOLD response

Moving beyond Labels: Finetuning CNNs on BOLD response#

By Neuromatch Academy

Content creators: Aakash Agrawal

Production editors: Spiros Chavlis

Finetuning CNN using regression loss#

CNN are proven to be a better model of visual cortex, but the goal of visual cortex is not limited to image classification.
Typically, to model visual cortex responses using CNNs, we -
1. Extract features of intermediate layers
2. Reduce dimensionality of the data using techniques like PCA
3. Perform regression to predict neural data.
This approach fails to predict all the variance in the data.

A better approach is to train CNNs directly on the neural response.

Setup#

Install dependencies#

  Building wheel for torch-intermediate-layer-getter (setup.py) ... ?25l?25hdone

#  Imports
import copy

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch_intermediate_layer_getter import IntermediateLayerGetter as MidGetter

from torchvision import transforms
import torchvision.models as models


from scipy.spatial.distance import pdist
from scipy.stats import pearsonr

Downloading Kay dataset#

Visualizing example images#

../../_images/58e834172389113bdaed9caca05c23732ce154357e235f6fc54f841ee59d6e16.png

Dataset Structure#

dat has the following fields:

stimuli: stim \(x_i \, \, x_j\) array of grayscale stimulus images
stimuli_test: stim \(x_i \, \, x_j\) array of grayscale stimulus images in the test set
responses: stim \(x\) voxel array of \(z\)-scored BOLD response amplitude
responses_test: stim \(x\) voxel array of \(z\)-scored BOLD response amplitude in the test set
roi: array of voxel labels
roi_names: array of names corresponding to voxel labels

# Converting stimulus to RGB and changing the scale to 0-255 (Specific to Kay dataset images)
stimuli_tr = dat["stimuli"]
stimuli_ts = dat["stimuli_test"]
stimuli_tr_xformed = np.zeros((1750, 3, 128, 128))
stimuli_ts_xformed = np.zeros((120, 3, 128, 128))
for i in range(1750):
  img = stimuli_tr[i, :, :]
  img = ((img - np.min(img))*255/(np.max(img) - np.min(img))).astype(int)
  stimuli_tr_xformed[i, :, :, :] = [img,img,img]

for i in range(120):
  img = stimuli_ts[i, :, :]
  img = ((img - np.min(img))*255/(np.max(img) - np.min(img))).astype(int)
  stimuli_ts_xformed[i, :, :, :] = [img, img, img]

Fine Tuning AlexNet on voxel activations#

Setting up training and test data for LOC region#

Custom dataloader for loading images in numpy array#

# Training
net = models.alexnet(pretrained=True)
num_ftrs = net.classifier[6].in_features
net.classifier[6] = nn.Linear(num_ftrs, np.shape(response_ts)[1])

net.to(device)
criterion = nn.MSELoss()
learning_rate = 0.1  # Change this
num_epochs =  5  # Change this
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)



best_model_wts = copy.deepcopy(net.state_dict())
best_loss = 10.0

for epoch in range(num_epochs):
	print(f"Epoch {epoch}/{num_epochs - 1}")
	print('-' * 20)

	# Each epoch has a training and validation phase
	for phase in ['train', 'val']:
		if phase == 'train':
			net.train()  # Set model to training mode
		else:
			net.eval()   # Set model to evaluate mode

		running_loss = 0.0
		running_corrects = 0

		# Iterate over data.
		for inputs, labels in dataloaders[phase]:
			inputs = inputs.to(device)
			labels = labels.to(device)

			# zero the parameter gradients
			optimizer.zero_grad()

			# forward
			# track history if only in train
			with torch.set_grad_enabled(phase == 'train'):
				outputs = net(inputs)
				loss = criterion(outputs.float(), labels.float())

				# backward + optimize only if in training phase
				if phase == 'train':
					loss.backward()
					optimizer.step()

			# statistics
			running_loss += loss.item() * inputs.size(0)

		epoch_loss = running_loss / dataset_sizes[phase]
		print(f"{phase} Loss: {epoch_loss:.4f}")

		# deep copy the model
		if phase == 'val' and epoch_loss < best_loss:
			best_loss = epoch_loss
			best_model_wts = copy.deepcopy(net.state_dict())

	print()

# load best model weights
net.load_state_dict(best_model_wts)

Epoch 0/4
--------------------

train Loss: 0.4805
val Loss: 0.0503

Epoch 1/4
--------------------
train Loss: 0.4680
val Loss: 0.0503

Epoch 2/4
--------------------
train Loss: 0.4679
val Loss: 0.0501

Epoch 3/4
--------------------
train Loss: 0.4677
val Loss: 0.0500

Epoch 4/4
--------------------
train Loss: 0.4677
val Loss: 0.0499

<All keys matched successfully>

## Extract features of all the intermediate layers from ImageNet-trained and finetuned Alexnet
return_layers = {
    'features.2': 'conv1',
    'features.5': 'conv2',
    'features.7': 'conv3',
    'features.9': 'conv4',
    'features.12': 'conv5',
    'classifier.1': 'fc1',
    'classifier.4': 'fc2',
    'classifier.6': 'fc3',
    }

# Loading AlexNet pretrained on Imagenet
net_im = models.alexnet(pretrained=True)
net_im.eval()
net_im.to(device)


# Setting up feature extraction step
midfeat_ft = MidGetter(net, return_layers=return_layers, keep_output=True)
midfeat_im = MidGetter(net_im, return_layers=return_layers, keep_output=True)

# Loading validation data and forward pass through the network
dataloaders = {x: torch.utils.data.DataLoader(dataset[x], batch_size=120) for x in ['val']}
for inputs, labels in dataloaders['val']:
  inputs = inputs.to(device)
  mid_outputs_ft, _ = midfeat_ft(inputs)
  mid_outputs_im, _ = midfeat_im(inputs)

Dissimilarity - Correlation#

Plotting correlation between observed and predicted dissimilarity values#

../../_images/c2bea027351682cd7f507d2ea8a5b71685ac9d430535519de4d7bdddd2353b9b.png

../../_images/952e2dd4cf37ae693322e080e640236a4d44354be861b5d314a11e8cffa37f4a.png