{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "execution": {}, "id": "view-in-github" }, "source": [ "\"Open   \"Open" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "# Tutorial 3: Deep linear neural networks\n", "**Week 1, Day 2: Linear Deep Learning**\n", "\n", "**By Neuromatch Academy**\n", "\n", "__Content creators:__ Saeed Salehi, Spiros Chavlis, Andrew Saxe\n", "\n", "__Content reviewers:__ Polina Turishcheva, Antoine De Comite\n", "\n", "__Content editors:__ Anoop Kulkarni\n", "\n", "__Production editors:__ Khalid Almubarak, Gagana B, Spiros Chavlis" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Tutorial Objectives\n", "\n", "* Deep linear neural networks\n", "* Learning dynamics and singular value decomposition\n", "* Representational Similarity Analysis\n", "* Illusory correlations & ethics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @markdown\n", "from IPython.display import IFrame\n", "from ipywidgets import widgets\n", "out = widgets.Output()\n", "with out:\n", " print(f\"If you want to download the slides: https://osf.io/download/bncr8/\")\n", " display(IFrame(src=f\"https://mfr.ca-1.osf.io/render?url=https://osf.io/bncr8/?direct%26mode=render%26action=download%26mode=render\", width=730, height=410))\n", "display(out)" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Setup\n", "\n", "This a GPU-Free tutorial!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install and import feedback gadget\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Install and import feedback gadget\n", "\n", "!pip3 install vibecheck datatops --quiet\n", "\n", "from vibecheck import DatatopsContentReviewContainer\n", "def content_review(notebook_section: str):\n", " return DatatopsContentReviewContainer(\n", " \"\", # No text prompt\n", " notebook_section,\n", " {\n", " \"url\": \"https://pmyvdlilci.execute-api.us-east-1.amazonaws.com/klab\",\n", " \"name\": \"neuromatch_dl\",\n", " \"user_key\": \"f379rz8y\",\n", " },\n", " ).render()\n", "\n", "\n", "feedback_prefix = \"W1D2_T3\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "# Imports\n", "import math\n", "import torch\n", "import matplotlib\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "import torch.nn as nn\n", "import torch.optim as optim" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Figure settings\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Figure settings\n", "import logging\n", "logging.getLogger('matplotlib.font_manager').disabled = True\n", "\n", "from matplotlib import gridspec\n", "from ipywidgets import interact, IntSlider, FloatSlider, fixed\n", "from ipywidgets import FloatLogSlider, Layout, VBox\n", "from ipywidgets import interactive_output\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "%config InlineBackend.figure_format = 'retina'\n", "plt.style.use(\"https://raw.githubusercontent.com/NeuromatchAcademy/content-creation/main/nma.mplstyle\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plotting functions\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Plotting functions\n", "\n", "def plot_x_y_hier_data(im1, im2, subplot_ratio=[1, 2]):\n", " \"\"\"\n", " Plot hierarchical data of labels vs features\n", " for all samples\n", "\n", " Args:\n", " im1: np.ndarray\n", " Input Dataset\n", " im2: np.ndarray\n", " Targets\n", " subplot_ratio: list\n", " Subplot ratios used to create subplots of varying sizes\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " fig = plt.figure(figsize=(12, 5))\n", " gs = gridspec.GridSpec(1, 2, width_ratios=subplot_ratio)\n", " ax0 = plt.subplot(gs[0])\n", " ax1 = plt.subplot(gs[1])\n", " ax0.imshow(im1, cmap=\"cool\")\n", " ax1.imshow(im2, cmap=\"cool\")\n", " ax0.set_title(\"Labels of all samples\")\n", " ax1.set_title(\"Features of all samples\")\n", " ax0.set_axis_off()\n", " ax1.set_axis_off()\n", " plt.tight_layout()\n", " plt.show()\n", "\n", "\n", "def plot_x_y_hier_one(im1, im2, subplot_ratio=[1, 2]):\n", " \"\"\"\n", " Plot hierarchical data of labels vs features\n", " for a single sample\n", "\n", " Args:\n", " im1: np.ndarray\n", " Input Dataset\n", " im2: np.ndarray\n", " Targets\n", " subplot_ratio: list\n", " Subplot ratios used to create subplots of varying sizes\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " fig = plt.figure(figsize=(12, 1))\n", " gs = gridspec.GridSpec(1, 2, width_ratios=subplot_ratio)\n", " ax0 = plt.subplot(gs[0])\n", " ax1 = plt.subplot(gs[1])\n", " ax0.imshow(im1, cmap=\"cool\")\n", " ax1.imshow(im2, cmap=\"cool\")\n", " ax0.set_title(\"Labels of a single sample\")\n", " ax1.set_title(\"Features of a single sample\")\n", " ax0.set_axis_off()\n", " ax1.set_axis_off()\n", " plt.tight_layout()\n", " plt.show()\n", "\n", "\n", "def plot_tree_data(label_list = None, feature_array = None, new_feature = None):\n", " \"\"\"\n", " Plot tree data\n", "\n", " Args:\n", " label_list: np.ndarray\n", " List of labels [default: None]\n", " feature_array: np.ndarray\n", " List of features [default: None]\n", " new_feature: string\n", " Enables addition of new features\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " cmap = matplotlib.colors.ListedColormap(['cyan', 'magenta'])\n", " n_features = 10\n", " n_labels = 8\n", " im1 = np.eye(n_labels)\n", " if feature_array is None:\n", " im2 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],\n", " [0, 0, 0, 0, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 1, 1, 1, 1],\n", " [1, 1, 1, 1, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 0, 0, 1, 1],\n", " [0, 0, 1, 1, 0, 0, 0, 0],\n", " [1, 1, 0, 0, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 1, 1, 0, 0],\n", " [0, 1, 1, 1, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 1, 1, 0, 1]]).T\n", " im2[im2 == 0] = -1\n", " feature_list = ['can_grow',\n", " 'is_mammal',\n", " 'has_leaves',\n", " 'can_move',\n", " 'has_trunk',\n", " 'can_fly',\n", " 'can_swim',\n", " 'has_stem',\n", " 'is_warmblooded',\n", " 'can_flower']\n", " else:\n", " im2 = feature_array\n", " if label_list is None:\n", " label_list = ['Goldfish', 'Tuna', 'Robin', 'Canary',\n", " 'Rose', 'Daisy', 'Pine', 'Oak']\n", " fig = plt.figure(figsize=(12, 7))\n", " gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1.35])\n", " ax1 = plt.subplot(gs[0])\n", " ax2 = plt.subplot(gs[1])\n", " ax1.imshow(im1, cmap=cmap)\n", " if feature_array is None:\n", " implt = ax2.imshow(im2, cmap=cmap, vmin=-1.0, vmax=1.0)\n", " else:\n", " implt = ax2.imshow(im2[:, -n_features:], cmap=cmap, vmin=-1.0, vmax=1.0)\n", " divider = make_axes_locatable(ax2)\n", " cax = divider.append_axes(\"right\", size=\"5%\", pad=0.1)\n", " cbar = plt.colorbar(implt, cax=cax, ticks=[-0.5, 0.5])\n", " cbar.ax.set_yticklabels(['no', 'yes'])\n", " ax1.set_title(\"Labels\")\n", " ax1.set_yticks(ticks=np.arange(n_labels))\n", " ax1.set_yticklabels(labels=label_list)\n", " ax1.set_xticks(ticks=np.arange(n_labels))\n", " ax1.set_xticklabels(labels=label_list, rotation='vertical')\n", " ax2.set_title(\"{} random Features\".format(n_features))\n", " ax2.set_yticks(ticks=np.arange(n_labels))\n", " ax2.set_yticklabels(labels=label_list)\n", " if feature_array is None:\n", " ax2.set_xticks(ticks=np.arange(n_features))\n", " ax2.set_xticklabels(labels=feature_list, rotation='vertical')\n", " else:\n", " ax2.set_xticks(ticks=[n_features-1])\n", " ax2.set_xticklabels(labels=[new_feature], rotation='vertical')\n", " plt.tight_layout()\n", " plt.show()\n", "\n", "\n", "def plot_loss(loss_array,\n", " title=\"Training loss (Mean Squared Error)\",\n", " c=\"r\"):\n", " \"\"\"\n", " Plot loss function\n", "\n", " Args:\n", " c: string\n", " Specifies plot color\n", " title: string\n", " Specifies plot title\n", " loss_array: np.ndarray\n", " Log of MSE loss per epoch\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " plt.figure(figsize=(10, 5))\n", " plt.plot(loss_array, color=c)\n", " plt.xlabel(\"Epoch\")\n", " plt.ylabel(\"MSE\")\n", " plt.title(title)\n", " plt.show()\n", "\n", "\n", "def plot_loss_sv(loss_array, sv_array):\n", " \"\"\"\n", " Plot loss function\n", "\n", " Args:\n", " sv_array: np.ndarray\n", " Log of singular values/modes across epochs\n", " loss_array: np.ndarray\n", " Log of MSE loss per epoch\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " n_sing_values = sv_array.shape[1]\n", " sv_array = sv_array / np.max(sv_array)\n", " cmap = plt.cm.get_cmap(\"Set1\", n_sing_values)\n", "\n", " _, (plot1, plot2) = plt.subplots(2, 1, sharex=True, figsize=(10, 10))\n", " plot1.set_title(\"Training loss (Mean Squared Error)\")\n", " plot1.plot(loss_array, color='r')\n", "\n", " plot2.set_title(\"Evolution of singular values (modes)\")\n", " for i in range(n_sing_values):\n", " plot2.plot(sv_array[:, i], c=cmap(i))\n", " plot2.set_xlabel(\"Epoch\")\n", " plt.show()\n", "\n", "\n", "def plot_loss_sv_twin(loss_array, sv_array):\n", " \"\"\"\n", " Plot learning dynamics\n", "\n", " Args:\n", " sv_array: np.ndarray\n", " Log of singular values/modes across epochs\n", " loss_array: np.ndarray\n", " Log of MSE loss per epoch\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " n_sing_values = sv_array.shape[1]\n", " sv_array = sv_array / np.max(sv_array)\n", " cmap = plt.cm.get_cmap(\"winter\", n_sing_values)\n", "\n", " fig = plt.figure(figsize=(10, 5))\n", " ax1 = plt.gca()\n", " ax1.set_title(\"Learning Dynamics\")\n", " ax1.set_xlabel(\"Epoch\")\n", " ax1.set_ylabel(\"Mean Squared Error\", c='r')\n", " ax1.tick_params(axis='y', labelcolor='r')\n", " ax1.plot(loss_array, color='r')\n", "\n", " ax2 = ax1.twinx()\n", " ax2.set_ylabel(\"Singular values (modes)\", c='b')\n", " ax2.tick_params(axis='y', labelcolor='b')\n", " for i in range(n_sing_values):\n", " ax2.plot(sv_array[:, i], c=cmap(i))\n", "\n", " fig.tight_layout()\n", " plt.show()\n", "\n", "\n", "def plot_ills_sv_twin(ill_array, sv_array, ill_label):\n", " \"\"\"\n", " Plot network training evolution\n", " and illusory correlations\n", "\n", " Args:\n", " sv_array: np.ndarray\n", " Log of singular values/modes across epochs\n", " ill_array: np.ndarray\n", " Log of illusory correlations per epoch\n", " ill_label: np.ndarray\n", " Log of labels associated with illusory correlations\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " n_sing_values = sv_array.shape[1]\n", " sv_array = sv_array / np.max(sv_array)\n", " cmap = plt.cm.get_cmap(\"winter\", n_sing_values)\n", "\n", " fig = plt.figure(figsize=(10, 5))\n", " ax1 = plt.gca()\n", " ax1.set_title(\"Network training and the Illusory Correlations\")\n", " ax1.set_xlabel(\"Epoch\")\n", " ax1.set_ylabel(ill_label, c='r')\n", " ax1.tick_params(axis='y', labelcolor='r')\n", " ax1.plot(ill_array, color='r', linewidth=3)\n", " ax1.set_ylim(-1.05, 1.05)\n", "\n", " ax2 = ax1.twinx()\n", " ax2.set_ylabel(\"Singular values (modes)\", c='b')\n", " ax2.tick_params(axis='y', labelcolor='b')\n", " for i in range(n_sing_values):\n", " ax2.plot(sv_array[:, i], c=cmap(i))\n", "\n", " fig.tight_layout()\n", " plt.show()\n", "\n", "\n", "def plot_loss_sv_rsm(loss_array, sv_array, rsm_array, i_ep):\n", " \"\"\"\n", " Plot learning dynamics\n", "\n", " Args:\n", " sv_array: np.ndarray\n", " Log of singular values/modes across epochs\n", " loss_array: np.ndarray\n", " Log of MSE loss per epoch\n", " rsm_array: torch.tensor\n", " Representation similarity matrix\n", " i_ep: int\n", " Which epoch to show\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " n_ep = loss_array.shape[0]\n", " rsm_array = rsm_array / np.max(rsm_array)\n", " sv_array = sv_array / np.max(sv_array)\n", "\n", " n_sing_values = sv_array.shape[1]\n", " cmap = plt.cm.get_cmap(\"winter\", n_sing_values)\n", "\n", " fig = plt.figure(figsize=(14, 5))\n", " gs = gridspec.GridSpec(1, 2, width_ratios=[5, 3])\n", "\n", " ax0 = plt.subplot(gs[1])\n", " ax0.yaxis.tick_right()\n", " implot = ax0.imshow(rsm_array[i_ep], cmap=\"Purples\", vmin=0.0, vmax=1.0)\n", " divider = make_axes_locatable(ax0)\n", " cax = divider.append_axes(\"right\", size=\"5%\", pad=0.9)\n", " cbar = plt.colorbar(implot, cax=cax, ticks=[])\n", " cbar.ax.set_ylabel('Similarity', fontsize=12)\n", " ax0.set_title(\"RSM at epoch {}\".format(i_ep), fontsize=16)\n", " ax0.set_yticks(ticks=np.arange(n_sing_values))\n", " ax0.set_yticklabels(labels=item_names)\n", " ax0.set_xticks(ticks=np.arange(n_sing_values))\n", " ax0.set_xticklabels(labels=item_names, rotation='vertical')\n", "\n", " ax1 = plt.subplot(gs[0])\n", " ax1.set_title(\"Learning Dynamics\", fontsize=16)\n", " ax1.set_xlabel(\"Epoch\")\n", " ax1.set_ylabel(\"Mean Squared Error\", c='r')\n", " ax1.tick_params(axis='y', labelcolor='r', direction=\"in\")\n", " ax1.plot(np.arange(n_ep), loss_array, color='r')\n", " ax1.axvspan(i_ep-2, i_ep+2, alpha=0.2, color='m')\n", "\n", " ax2 = ax1.twinx()\n", " ax2.set_ylabel(\"Singular values\", c='b')\n", " ax2.tick_params(axis='y', labelcolor='b', direction=\"in\")\n", " for i in range(n_sing_values):\n", " ax2.plot(np.arange(n_ep), sv_array[:, i], c=cmap(i))\n", " ax1.set_xlim(-1, n_ep+1)\n", " ax2.set_xlim(-1, n_ep+1)\n", "\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Helper functions\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Helper functions\n", "\n", "def build_tree(n_levels, n_branches, probability,\n", " to_np_array=True):\n", " \"\"\"\n", " Builds tree\n", "\n", " Args:\n", " n_levels: int\n", " Number of levels in tree\n", " n_branches: int\n", " Number of branches in tree\n", " probability: float\n", " Flipping probability\n", " to_np_array: boolean\n", " If true, represent tree as np.ndarray\n", "\n", " Returns:\n", " tree: dict if to_np_array=False\n", " np.ndarray otherwise\n", " Tree\n", " \"\"\"\n", " assert 0.0 <= probability <= 1.0\n", "\n", " tree = {}\n", "\n", " tree[\"level\"] = [0]\n", " for i in range(1, n_levels+1):\n", " tree[\"level\"].extend([i]*(n_branches**i))\n", "\n", " tree[\"pflip\"] = [probability]*len(tree[\"level\"])\n", "\n", " tree[\"parent\"] = [None]\n", " k = len(tree[\"level\"])-1\n", " for j in range(k//n_branches):\n", " tree[\"parent\"].extend([j]*n_branches)\n", "\n", " if to_np_array:\n", " tree[\"level\"] = np.array(tree[\"level\"])\n", " tree[\"pflip\"] = np.array(tree[\"pflip\"])\n", " tree[\"parent\"] = np.array(tree[\"parent\"])\n", "\n", " return tree\n", "\n", "\n", "def sample_from_tree(tree, n):\n", " \"\"\"\n", " Generates n samples from a tree\n", "\n", " Args:\n", " tree: np.ndarray/dictionary\n", " Tree\n", " n: int\n", " Number of levels in tree\n", "\n", " Returns:\n", " x: np.ndarray\n", " Sample from tree\n", " \"\"\"\n", " items = [i for i, v in enumerate(tree[\"level\"]) if v == max(tree[\"level\"])]\n", " n_items = len(items)\n", " x = np.zeros(shape=(n, n_items))\n", " rand_temp = np.random.rand(n, len(tree[\"pflip\"]))\n", " flip_temp = np.repeat(tree[\"pflip\"].reshape(1, -1), n, 0)\n", " samp = (rand_temp > flip_temp) * 2 - 1\n", "\n", " for i in range(n_items):\n", " j = items[i]\n", " prop = samp[:, j]\n", " while tree[\"parent\"][j] is not None:\n", " j = tree[\"parent\"][j]\n", " prop = prop * samp[:, j]\n", " x[:, i] = prop.T\n", " return x\n", "\n", "\n", "def generate_hsd():\n", " \"\"\"\n", " Building the tree\n", "\n", " Args:\n", " None\n", "\n", " Returns:\n", " tree_labels: np.ndarray\n", " Tree Labels\n", " tree_features: np.ndarray\n", " Sample from tree\n", " \"\"\"\n", " n_branches = 2 # 2 branches at each node\n", " probability = .15 # flipping probability\n", " n_levels = 3 # number of levels (depth of tree)\n", " tree = build_tree(n_levels, n_branches, probability, to_np_array=True)\n", " tree[\"pflip\"][0] = 0.5\n", " n_samples = 10000 # Sample this many features\n", "\n", " tree_labels = np.eye(n_branches**n_levels)\n", " tree_features = sample_from_tree(tree, n_samples).T\n", " return tree_labels, tree_features\n", "\n", "\n", "def linear_regression(X, Y):\n", " \"\"\"\n", " Analytical Linear regression\n", "\n", " Args:\n", " X: np.ndarray\n", " Input features\n", " Y: np.ndarray\n", " Targets\n", "\n", " Returns:\n", " W: np.ndarray\n", " Analytical solution\n", " W = Y @ X.T @ np.linalg.inv(X @ X.T)\n", " \"\"\"\n", " assert isinstance(X, np.ndarray)\n", " assert isinstance(Y, np.ndarray)\n", " M, Dx = X.shape\n", " N, Dy = Y.shape\n", " assert Dx == Dy\n", " W = Y @ X.T @ np.linalg.inv(X @ X.T)\n", " return W\n", "\n", "\n", "def add_feature(existing_features, new_feature):\n", " \"\"\"\n", " Adding new features to existing tree\n", "\n", " Args:\n", " existing_features: np.ndarray\n", " List of features already present in the tree\n", " new_feature: list\n", " List of new features to be added\n", "\n", " Returns:\n", " New features augmented with existing features\n", " \"\"\"\n", " assert isinstance(existing_features, np.ndarray)\n", " assert isinstance(new_feature, list)\n", " new_feature = np.array([new_feature]).T\n", " return np.hstack((tree_features, new_feature))\n", "\n", "\n", "def net_svd(model, in_dim):\n", " \"\"\"\n", " Performs a Singular Value Decomposition on\n", " given model weights\n", "\n", " Args:\n", " model: torch.nn.Module\n", " Neural network model\n", " in_dim: int\n", " The input dimension of the model\n", "\n", " Returns:\n", " U: torch.tensor\n", " Orthogonal Matrix\n", " Σ: torch.tensor\n", " Diagonal Matrix\n", " V: torch.tensor\n", " Orthogonal Matrix\n", " \"\"\"\n", " W_tot = torch.eye(in_dim)\n", " for weight in model.parameters():\n", " W_tot = weight.detach() @ W_tot\n", " U, SIGMA, V = torch.svd(W_tot)\n", " return U, SIGMA, V\n", "\n", "\n", "def net_rsm(h):\n", " \"\"\"\n", " Calculates the Representational Similarity Matrix\n", "\n", " Args:\n", " h: torch.Tensor\n", " Activity of a hidden layer\n", "\n", " Returns:\n", " rsm: torch.Tensor\n", " Representational Similarity Matrix\n", " \"\"\"\n", " rsm = h @ h.T\n", " return rsm\n", "\n", "\n", "def initializer_(model, gamma=1e-12):\n", " \"\"\"\n", " In-place Re-initialization of weights\n", "\n", " Args:\n", " model: torch.nn.Module\n", " PyTorch neural net model\n", " gamma: float\n", " Initialization scale\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " for weight in model.parameters():\n", " n_out, n_in = weight.shape\n", " sigma = gamma / math.sqrt(n_in + n_out)\n", " nn.init.normal_(weight, mean=0.0, std=sigma)\n", "\n", "\n", "def test_initializer_ex(seed):\n", " \"\"\"\n", " Testing initializer implementation\n", "\n", " Args:\n", " seed: int\n", " Set for reproducibility\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " torch.manual_seed(seed)\n", " model = LNNet(5000, 5000, 1)\n", " try:\n", " ex_initializer_(model, gamma=1)\n", " std = torch.std(next(iter(model.parameters())).detach()).item()\n", " if -1e-5 <= (std - 0.01) <= 1e-5:\n", " print(\"Well done! Seems to be correct!\")\n", " else:\n", " print(\"Please double check your implementation!\")\n", " except:\n", " print(\"Faulty Implementation!\")\n", "\n", "\n", "def test_net_svd_ex(seed):\n", " \"\"\"\n", " Tests net_svd_ex exercise\n", "\n", " Args:\n", " seed: int\n", " Set for reproducibility\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " torch.manual_seed(seed)\n", " model = LNNet(8, 30, 100)\n", " try:\n", " U_ex, Σ_ex, V_ex = ex_net_svd(model, 8)\n", " U, Σ, V = net_svd(model, 8)\n", " if (torch.all(torch.isclose(U_ex.detach(), U.detach(), atol=1e-6)) and\n", " torch.all(torch.isclose(Σ_ex.detach(), Σ.detach(), atol=1e-6)) and\n", " torch.all(torch.isclose(V_ex.detach(), V.detach(), atol=1e-6))):\n", " print(\"Well done! Seems to be correct!\")\n", " else:\n", " print(\"Please double check your implementation!\")\n", " except:\n", " print(\"Faulty Implementation!\")\n", "\n", "\n", "def test_net_rsm_ex(seed):\n", " \"\"\"\n", " Tests net_rsm_ex implementation\n", "\n", " Args:\n", " seed: int\n", " Set for reproducibility\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " torch.manual_seed(seed)\n", " x = torch.rand(7, 17)\n", " try:\n", " y_ex = ex_net_rsm(x)\n", " y = x @ x.T\n", " if (torch.all(torch.isclose(y_ex, y, atol=1e-6))):\n", " print(\"Well done! Seems to be correct!\")\n", " else:\n", " print(\"Please double check your implementation!\")\n", " except:\n", " print(\"Faulty Implementation!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set random seed\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " Executing `set_seed(seed=seed)` you are setting the seed\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "#@title Set random seed\n", "\n", "#@markdown Executing `set_seed(seed=seed)` you are setting the seed\n", "\n", "# For DL its critical to set the random seed so that students can have a\n", "# baseline to compare their results to expected results.\n", "# Read more here: https://pytorch.org/docs/stable/notes/randomness.html\n", "\n", "# Call `set_seed` function in the exercises to ensure reproducibility.\n", "import random\n", "import torch\n", "\n", "def set_seed(seed=None, seed_torch=True):\n", " \"\"\"\n", " Function that controls randomness. NumPy and random modules must be imported.\n", "\n", " Args:\n", " seed : Integer\n", " A non-negative integer that defines the random state. Default is `None`.\n", " seed_torch : Boolean\n", " If `True` sets the random seed for pytorch tensors, so pytorch module\n", " must be imported. Default is `True`.\n", "\n", " Returns:\n", " Nothing.\n", " \"\"\"\n", " if seed is None:\n", " seed = np.random.choice(2 ** 32)\n", " random.seed(seed)\n", " np.random.seed(seed)\n", " if seed_torch:\n", " torch.manual_seed(seed)\n", " torch.cuda.manual_seed_all(seed)\n", " torch.cuda.manual_seed(seed)\n", " torch.backends.cudnn.benchmark = False\n", " torch.backends.cudnn.deterministic = True\n", "\n", " print(f'Random seed {seed} has been set.')\n", "\n", "\n", "# In case that `DataLoader` is used\n", "def seed_worker(worker_id):\n", " \"\"\"\n", " DataLoader will reseed workers following randomness in\n", " multi-process data loading algorithm.\n", "\n", " Args:\n", " worker_id: integer\n", " ID of subprocess to seed. 0 means that\n", " the data will be loaded in the main process\n", " Refer: https://pytorch.org/docs/stable/data.html#data-loading-randomness for more details\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " worker_seed = torch.initial_seed() % 2**32\n", " np.random.seed(worker_seed)\n", " random.seed(worker_seed)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set device (GPU or CPU). Execute `set_device()`\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "#@title Set device (GPU or CPU). Execute `set_device()`\n", "# especially if torch modules used.\n", "\n", "# Inform the user if the notebook uses GPU or CPU.\n", "\n", "def set_device():\n", " \"\"\"\n", " Set the device. CUDA if available, CPU otherwise\n", "\n", " Args:\n", " None\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", " if device != \"cuda\":\n", " print(\"GPU is not enabled in this notebook. \\n\"\n", " \"If you want to enable it, in the menu under `Runtime` -> \\n\"\n", " \"`Hardware accelerator.` and select `GPU` from the dropdown menu\")\n", " else:\n", " print(\"GPU is enabled in this notebook. \\n\"\n", " \"If you want to disable it, in the menu under `Runtime` -> \\n\"\n", " \"`Hardware accelerator.` and select `None` from the dropdown menu\")\n", "\n", " return device" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "SEED = 2021\n", "set_seed(seed=SEED)\n", "DEVICE = set_device()" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "This colab notebook is GPU free!" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Section 0: Prelude\n", "*Time estimate: ~10 mins*\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "**A note on the exercises**: Most of the exercises are marked `Optional(Bonus)` and should only read through them if you are in a tight timeline. Therefore we would not rely on the implementation of the exercises. If necessary, you can look at the *Helper Functions* cell above to find the functions and classes used in this tutorial.\n", "\n", "Throughout this tutorial, we will use a linear neural net with a single hidden layer. We have also excluded `bias` from the layers. Please note that the forward loop returns the hidden activation, besides the network output (prediction). we will need it in section 3." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "class LNNet(nn.Module):\n", " \"\"\"\n", " A Linear Neural Net with one hidden layer\n", " \"\"\"\n", "\n", " def __init__(self, in_dim, hid_dim, out_dim):\n", " \"\"\"\n", " Initialize LNNet parameters\n", "\n", " Args:\n", " in_dim: int\n", " Input dimension\n", " out_dim: int\n", " Ouput dimension\n", " hid_dim: int\n", " Hidden dimension\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " super().__init__()\n", " self.in_hid = nn.Linear(in_dim, hid_dim, bias=False)\n", " self.hid_out = nn.Linear(hid_dim, out_dim, bias=False)\n", "\n", " def forward(self, x):\n", " \"\"\"\n", " Forward pass of LNNet\n", "\n", " Args:\n", " x: torch.Tensor\n", " Input tensor\n", "\n", " Returns:\n", " hid: torch.Tensor\n", " Hidden layer activity\n", " out: torch.Tensor\n", " Output/Prediction\n", " \"\"\"\n", " hid = self.in_hid(x) # Hidden activity\n", " out = self.hid_out(hid) # Output (prediction)\n", " return out, hid" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "Other than `net_svd` and `net_rsm` functions, the training loop should be mostly familiar to you. We will define these functions in the coming sections.\n", "\n", "**Important**: Please note that the two functions are part of inner training loop and are therefore executed and recorded at every iteration." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "def train(model, inputs, targets, n_epochs, lr, illusory_i=0):\n", " \"\"\"\n", " Training function\n", "\n", " Args:\n", " model: torch nn.Module\n", " The neural network\n", " inputs: torch.Tensor\n", " Features (input) with shape `[batch_size, input_dim]`\n", " targets: torch.Tensor\n", " Targets (labels) with shape `[batch_size, output_dim]`\n", " n_epochs: int\n", " Number of training epochs (iterations)\n", " lr: float\n", " Learning rate\n", " illusory_i: int\n", " Index of illusory feature\n", "\n", " Returns:\n", " losses: np.ndarray\n", " Record (evolution) of training loss\n", " modes: np.ndarray\n", " Record (evolution) of singular values (dynamic modes)\n", " rs_mats: np.ndarray\n", " Record (evolution) of representational similarity matrices\n", " illusions: np.ndarray\n", " Record of network prediction for the last feature\n", " \"\"\"\n", " in_dim = inputs.size(1)\n", "\n", " losses = np.zeros(n_epochs) # Loss records\n", " modes = np.zeros((n_epochs, in_dim)) # Singular values (modes) records\n", " rs_mats = [] # Representational similarity matrices\n", " illusions = np.zeros(n_epochs) # Prediction for the given feature\n", "\n", " optimizer = optim.SGD(model.parameters(), lr=lr)\n", " criterion = nn.MSELoss()\n", "\n", " for i in range(n_epochs):\n", " optimizer.zero_grad()\n", " predictions, hiddens = model(inputs)\n", " loss = criterion(predictions, targets)\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Section 2 Singular value decomposition\n", " U, Σ, V = net_svd(model, in_dim)\n", "\n", " # Section 3 calculating representational similarity matrix\n", " RSM = net_rsm(hiddens.detach())\n", "\n", " # Section 4 network prediction of illusory_i inputs for the last feature\n", " pred_ij = predictions.detach()[illusory_i, -1]\n", "\n", " # Logging (recordings)\n", " losses[i] = loss.item()\n", " modes[i] = Σ.detach().numpy()\n", " rs_mats.append(RSM.numpy())\n", " illusions[i] = pred_ij.numpy()\n", "\n", " return losses, modes, np.array(rs_mats), illusions" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "We also need take over the initialization of the weights. In PyTorch, [`nn.init`](https://pytorch.org/docs/stable/nn.init.html) provides us with the functions to initialize tensors from a given distribution." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Coding Exercise 0: Re-initialization (Optional)\n", "\n", "Complete the function `ex_initializer_`, such that the weights are sampled from the following distribution:\n", "\n", "\\begin{equation}\n", "\\mathcal{N}\\left(\\mu=0, ~~\\sigma=\\gamma \\sqrt{\\dfrac{1}{n_{in} + n_{out}}} \\right)\n", "\\end{equation}\n", "\n", "where $\\gamma$ is the initialization scale, $n_{in}$ and $n_{out}$ are respectively input and output dimensions of the layer. the Underscore (\"_\") in `ex_initializer_` and other functions, denotes \"[in-place](https://discuss.pytorch.org/t/what-is-in-place-operation/16244/2)\" operation.\n", "\n", "**important note**: Since we did not include bias in the layers, the `model.parameters()` would only return the weights in each layer." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "def ex_initializer_(model, gamma=1e-12):\n", " \"\"\"\n", " In-place Re-initialization of weights\n", "\n", " Args:\n", " model: torch.nn.Module\n", " PyTorch neural net model\n", " gamma: float\n", " Initialization scale\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " for weight in model.parameters():\n", " n_out, n_in = weight.shape\n", " #################################################\n", " ## Define the standard deviation (sigma) for the normal distribution\n", " # as given in the equation above\n", " # Complete the function and remove or comment the line below\n", " raise NotImplementedError(\"Function `ex_initializer_`\")\n", " #################################################\n", " sigma = ...\n", " nn.init.normal_(weight, mean=0.0, std=sigma)\n", "\n", "\n", "\n", "## uncomment and run\n", "# test_initializer_ex(SEED)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "execution": {} }, "source": [ "[*Click for solution*](https://github.com/NeuromatchAcademy/course-content-dl/tree/main/tutorials/W1D2_LinearDeepLearning/solutions/W1D2_Tutorial3_Solution_8291e408.py)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Reinitialization_Exercise\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Section 1: Deep Linear Neural Nets\n", "\n", "*Time estimate: ~20 mins*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Video 1: Intro to Representation Learning\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 1: Intro to Representation Learning\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'DqMSU4Bikt0'), ('Bilibili', 'BV1iM4y1T7eJ')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Intro_to_Representation_Learning_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "So far, depth just seems to slow down the learning. And we know that a single nonlinear hidden layer (given enough number of neurons and infinite training samples) has the potential to approximate any function. So it seems fair to ask: **What is depth good for**?\n", "\n", "One reason can be that shallow nonlinear neural networks hardly meet their true potential in practice. In the contrast, deep neural nets are often surprisingly powerful in learning complex functions without sacrificing generalization. A core intuition behind deep learning is that deep nets derive their power through learning internal representations. How does this work? To address representation learning, we have to go beyond the 1D chain.\n", "\n", "For this and the next couple of exercises, we use syntactically generated hierarchically structured data through a *branching diffusion process* (see [this reference](https://www.pnas.org/content/pnas/suppl/2019/05/16/1820226116.DCSupplemental/pnas.1820226116.sapp.pdf) for more details).\n", "\n", "
\"Simple
\n", "\n", "
hierarchically structured data (a tree)
\n", "\n", "The inputs to the network are labels (i.e. names), while the outputs are the features (i.e. attributes). For example, for the label \"Goldfish\", the network has to learn all the (artificially created) features, such as \"*can swim*\", \"*is cold-blooded*\", \"*has fins*\", and more. Given that we are training on hierarchically structured data, network could also learn the tree structure, that Goldfish and Tuna have rather similar features, and Robin has more in common with Tuna, compared to Rose." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Run to generate and visualize training samples from tree\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @markdown #### Run to generate and visualize training samples from tree\n", "\n", "tree_labels, tree_features = generate_hsd()\n", "\n", "# Convert (cast) data from np.ndarray to torch.Tensor\n", "label_tensor = torch.tensor(tree_labels).float()\n", "feature_tensor = torch.tensor(tree_features).float()\n", "\n", "item_names = ['Goldfish', 'Tuna', 'Robin', 'Canary',\n", " 'Rose', 'Daisy', 'Pine', 'Oak']\n", "plot_tree_data()\n", "\n", "# Dimensions\n", "print(\"---------------------------------------------------------------\")\n", "print(\"Input Dimension: {}\".format(tree_labels.shape[1]))\n", "print(\"Output Dimension: {}\".format(tree_features.shape[1]))\n", "print(\"Number of samples: {}\".format(tree_features.shape[0]))" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "To continue this tutorial, it is vital to understand the premise of our training data and what the task is. Therefore, please take your time to discuss them with your pod.\n", "\n", "
\"neural
\n", "\n", "
The neural network used for this tutorial
" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Interactive Demo 1: Training the deep LNN\n", "\n", "Training a neural net on our data is straight forward. But before executing the next cell, remember the training loss curve from previous tutorial." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Make sure you execute this cell to train the network and plot\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @markdown #### Make sure you execute this cell to train the network and plot\n", "\n", "lr = 100.0 # Learning rate\n", "gamma = 1e-12 # Initialization scale\n", "n_epochs = 250 # Number of epochs\n", "dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", "dim_hidden = 30 # Hidden neurons\n", "dim_output = 10000 # Output dimension = `feature_tensor.size(1)`\n", "\n", "# Model instantiation\n", "dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", "# Weights re-initialization\n", "initializer_(dlnn_model, gamma)\n", "\n", "# Training\n", "losses, *_ = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr)\n", "\n", "# Plotting\n", "plot_loss(losses)" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "**Think!**\n", "\n", "Why haven't we seen these \"bumps\" in training before? And should we look for them in the future? What do these bumps mean?\n", "\n", "Recall from previous tutorial, that we are always interested in learning rate ($\\eta$) and initialization ($\\gamma$) that would give us the fastest but yet stable (reliable) convergence. Try finding the optimal $\\eta$ and $\\gamma$ using the following widgets. More specifically, try large $\\gamma$ and see if we can recover the bumps by tuning the $\\eta$." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Make sure you execute this cell to enable the widget!\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @markdown #### Make sure you execute this cell to enable the widget!\n", "\n", "def loss_lr_init(lr, gamma):\n", " \"\"\"\n", " Trains and plots the loss evolution\n", "\n", " Args:\n", " lr: float\n", " Learning rate\n", " gamma: float\n", " Initialization scale\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " n_epochs = 250 # Number of epochs\n", " dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", " dim_hidden = 30 # Hidden neurons\n", " dim_output = 10000 # Output dimension = `feature_tensor.size(1)`\n", "\n", " # Model instantiation\n", " dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", " # Weights re-initialization\n", " initializer_(dlnn_model, gamma)\n", "\n", " losses, *_ = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr)\n", "\n", " plot_loss(losses)\n", "\n", "_ = interact(loss_lr_init,\n", " lr = FloatSlider(min=1.0, max=200.0,\n", " step=1.0, value=100.0,\n", " continuous_update=False,\n", " readout_format='.1f',\n", " description='eta'),\n", " epochs = fixed(250),\n", " gamma = FloatLogSlider(min=-15, max=1,\n", " step=1, value=1e-12, base=10,\n", " continuous_update=False,\n", " description='gamma')\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Training_the_deep_LNN_Interactive_Demo\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Section 2: Singular Value Decomposition (SVD)\n", "\n", "*Time estimate: ~20 mins*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Video 2: SVD\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 2: SVD\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', '18oNWRziskM'), ('Bilibili', 'BV1bw411R7DJ')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_SVD_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "In this section, we intend to study the learning (training) dynamics we just saw. First, we should know that a linear neural network is performing sequential matrix multiplications, which can be simplified to:\n", "\n", "\\begin{align}\n", "\\mathbf{y} &= \\mathbf{W}_{L}~\\mathbf{W}_{L-1}~\\dots~\\mathbf{W}_{1} ~ \\mathbf{x} \\\\\n", " &= \\left(\\prod_{i=1}^{L}{\\mathbf{W}_{i}}\\right) ~ \\mathbf{x} \\\\\n", " &= \\mathbf{W}_{tot} ~ \\mathbf{x}\n", "\\end{align}\n", "\n", "where $L$ denotes the number of layers in our network.\n", "\n", "[Saxe et al. (2013)](https://arxiv.org/abs/1312.6120) showed that to analyze and to understanding the nonlinear learning dynamics of a deep LNN, we can use [Singular Value Decomposition (SVD)](https://en.wikipedia.org/wiki/Singular_value_decomposition) to decompose the $\\mathbf{W}_{tot}$ into orthogonal vectors, where orthogonality of the vectors would ensure their \"individuality (independence)\". This means we can break a deep wide LNN into multiple deep narrow LNN, so their activity is untangled from each other.\n", "\n", "
\n", "\n", "__A Quick intro to SVD__\n", "\n", "Any real-valued matix $A$ (yes, ANY) can be decomposed (factorized) to 3 matrices:\n", "\n", "\\begin{equation}\n", "\\mathbf{A} = \\mathbf{U} \\mathbf{Σ} \\mathbf{V}^{\\top}\n", "\\end{equation}\n", "\n", "where $U$ is an orthogonal matrix, $\\Sigma$ is a diagonal matrix, and $V$ is again an orthogonal matrix. The diagonal elements of $\\Sigma$ are called **singular values**.\n", "\n", "The main difference between SVD and EigenValue Decomposition (EVD), is that EVD requires $A$ to be squared and does not guarantee the eigenvectors to be orthogonal.\n", "\n", "We strongly recommend the [Singular Value Decomposition (the SVD)](https://www.youtube.com/watch?v=mBcLRGuAFUk) by the amazing [Gilbert Strang](http://www-math.mit.edu/~gs/), if you would like to learn more." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Coding Exercise 2: SVD (Optional)\n", "\n", "The goal is to perform the SVD on $\\mathbf{W}_{tot}$ in every epoch, and record the singular values (modes) during the training.\n", "\n", "Complete the function `ex_net_svd`, by first calculating the $\\mathbf{W}_{tot} = \\prod_{i=1}^{L}{\\mathbf{W}_{i}}$ and finally performing SVD on the $\\mathbf{W}_{tot}$. Please use the PyTorch [`torch.svd`](https://pytorch.org/docs/stable/generated/torch.svd.html) instead of NumPy [`np.linalg.svd`](https://numpy.org/doc/stable/reference/generated/numpy.linalg.svd.html)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "def ex_net_svd(model, in_dim):\n", " \"\"\"\n", " Performs a Singular Value Decomposition on a given model weights\n", "\n", " Args:\n", " model: torch.nn.Module\n", " Neural network model\n", " in_dim: int\n", " The input dimension of the model\n", "\n", " Returns:\n", " U: torch.tensor\n", " Orthogonal matrix\n", " Σ: torch.tensor\n", " Diagonal matrix\n", " V: torch.tensor\n", " Orthogonal matrix\n", " \"\"\"\n", " W_tot = torch.eye(in_dim)\n", " for weight in model.parameters():\n", " #################################################\n", " ## Calculate the W_tot by multiplication of all weights\n", " # and then perform SVD on the W_tot using pytorch's `torch.svd`\n", " # Complete the function and remove or comment the line below\n", " raise NotImplementedError(\"Function `ex_net_svd`\")\n", " #################################################\n", " W_tot = ...\n", " U, Σ, V = ...\n", " return U, Σ, V\n", "\n", "\n", "## Uncomment and run\n", "# test_net_svd_ex(SEED)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "execution": {} }, "source": [ "[*Click for solution*](https://github.com/NeuromatchAcademy/course-content-dl/tree/main/tutorials/W1D2_LinearDeepLearning/solutions/W1D2_Tutorial3_Solution_e65fa809.py)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_SVD_Exercise\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Make sure you execute this cell to train the network and plot\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @markdown #### Make sure you execute this cell to train the network and plot\n", "\n", "lr = 100.0 # Learning rate\n", "gamma = 1e-12 # Initialization scale\n", "n_epochs = 250 # Number of epochs\n", "dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", "dim_hidden = 30 # Hidden neurons\n", "dim_output = 10000 # Output dimension = `feature_tensor.size(1)`\n", "\n", "# Model instantiation\n", "dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", "# Weights re-initialization\n", "initializer_(dlnn_model, gamma)\n", "\n", "# Training\n", "losses, modes, *_ = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr)\n", "\n", "plot_loss_sv_twin(losses, modes)" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "**Think!**\n", "\n", "In EigenValue decomposition, the amount of variance explained by eigenvectors is proportional to the corresponding eigenvalues. What about the SVD? We see that the gradient descent guides the network to first learn the features that carry more information (have higher singular value)!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_SVD_Discussion\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Video 3: SVD - Discussion\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 3: SVD - Discussion\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'JEbRPPG2kUI'), ('Bilibili', 'BV1t54y1J7Tb')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_SVD_Discussion_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Section 3: Representational Similarity Analysis (RSA)\n", "\n", "*Time estimate: ~20 mins*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Video 4: RSA\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 4: RSA\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'YOs1yffysX8'), ('Bilibili', 'BV19f4y157zD')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_RSA_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "The previous section ended with an interesting remark. SVD helped to break our deep \"wide\" linear neural net into 8 deep \"narrow\" linear neural nets.\n", "\n", "The first narrow net (highest singular value) converges fastest, while the last four narrow nets, converge almost simultaneously and have the smallest singular values. Can it be that the narrow net with larger mode is learning the difference between \"living things\" and \"objects\", while another narrow net with smaller mode is learning the difference between Fish and Birds? how could we check this hypothesis?\n", "\n", "Representational Similarity Analysis (RSA) is an approach that could help us understand the internal representation of our network. The main idea is that the activity of hidden units (neurons) in the network must be similar when the network is presented with similar input. For our dataset (hierarchically structured data), we expect the activity of neurons in the hidden layer to be more similar for Tuna and Canary, and less similar for Tuna and Oak.\n", "\n", "For similarity measure, we can use the good old dot (scalar) product, which is also called cosine similarity. For calculating the dot product between multiple vectors (which would be our case), we can simply use matrix multiplication. Therefore the Representational Similarity Matrix for multiple-input (batch) activity could be calculated as follow:\n", "\n", "\\begin{equation}\n", "RSM = \\mathbf{H} \\mathbf{H}^{\\top}\n", "\\end{equation}\n", "\n", "where $\\mathbf{H} = \\mathbf{X} \\mathbf{W_1}$ is the activity of hidden neurons for a given batch $\\mathbf{X}$." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Coding Exercise 3: RSA (Optional)\n", "\n", "The task is simple. We would need to measure the similarity between hidden layer activities $~\\mathbf{h} = \\mathbf{x} ~\\mathbf{W_1}$) for every input $\\mathbf{x}$.\n", "\n", "If we perform RSA in every iteration, we could also see the evolution of representation learning." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "def ex_net_rsm(h):\n", " \"\"\"\n", " Calculates the Representational Similarity Matrix\n", "\n", " Arg:\n", " h: torch.Tensor\n", " Activity of a hidden layer\n", "\n", " Returns:\n", " rsm: torch.Tensor\n", " Representational Similarity Matrix\n", " \"\"\"\n", " #################################################\n", " ## Calculate the Representational Similarity Matrix\n", " # Complete the function and remove or comment the line below\n", " raise NotImplementedError(\"Function `ex_net_rsm`\")\n", " #################################################\n", " rsm = ...\n", " return rsm\n", "\n", "\n", "## Uncomment and run\n", "# test_net_rsm_ex(SEED)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "execution": {} }, "source": [ "[*Click for solution*](https://github.com/NeuromatchAcademy/course-content-dl/tree/main/tutorials/W1D2_LinearDeepLearning/solutions/W1D2_Tutorial3_Solution_322ede5f.py)\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "Now we can train the model while recording the losses, modes, and RSMs at every iteration. First, use the epoch slider to explore the evolution of RSM without changing default lr ($\\eta$) and initialization ($\\gamma$). Then, as we did before, set $\\eta$ and $\\gamma$ to larger values to see whether you can retrieve the sequential structured learning of representations." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Make sure you execute this cell to enable widgets\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "#@markdown #### Make sure you execute this cell to enable widgets\n", "\n", "def loss_svd_rsm_lr_gamma(lr, gamma, i_ep):\n", " \"\"\"\n", " Widget to record loss/mode/RSM at every iteration\n", "\n", " Args:\n", " lr: float\n", " Learning rate\n", " gamma: float\n", " Initialization scale\n", " i_ep: int\n", " Which epoch to show\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " n_epochs = 250 # Number of epochs\n", " dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", " dim_hidden = 30 # Hidden neurons\n", " dim_output = 10000 # Output dimension = `feature_tensor.size(1)`\n", "\n", " # Model instantiation\n", " dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", " # Weights re-initialization\n", " initializer_(dlnn_model, gamma)\n", "\n", " # Training\n", " losses, modes, rsms, _ = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr)\n", " plot_loss_sv_rsm(losses, modes, rsms, i_ep)\n", "\n", "i_ep_slider = IntSlider(min=10, max=241, step=1, value=61,\n", " continuous_update=False,\n", " description='Epoch',\n", " layout=Layout(width='630px'))\n", "\n", "lr_slider = FloatSlider(min=20.0, max=200.0, step=1.0, value=100.0,\n", " continuous_update=False,\n", " readout_format='.1f',\n", " description='eta')\n", "\n", "gamma_slider = FloatLogSlider(min=-15, max=1, step=1,\n", " value=1e-12, base=10,\n", " continuous_update=False,\n", " description='gamma')\n", "\n", "widgets_ui = VBox([lr_slider, gamma_slider, i_ep_slider])\n", "\n", "widgets_out = interactive_output(loss_svd_rsm_lr_gamma,\n", " {'lr': lr_slider,\n", " 'gamma': gamma_slider,\n", " 'i_ep': i_ep_slider})\n", "\n", "display(widgets_ui, widgets_out)" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "Let's take a moment to analyze this more. A deep neural net is learning the representations, rather than a naive mapping (look-up table). This is thought to be the reason for deep neural nets supreme generalization and transfer learning ability. Unsurprisingly, neural nets with no hidden layer are incapable of representation learning, even with extremely small initialization." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_RSA_Exercise\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Video 5: RSA - Discussion\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 5: RSA - Discussion\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'vprldATyq1o'), ('Bilibili', 'BV18y4y1j7Xr')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_RSA_Discussion_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Section 4: Illusory Correlations\n", "\n", "*Time estimate: ~20-30 mins*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Video 6: Illusory Correlations\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 6: Illusory Correlations\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'RxsAvyIoqEo'), ('Bilibili', 'BV1vv411E7Sq')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_IllusoryCorrelations_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "Let's recall the training loss curves. There was often a long plateau (where the weights are stuck at a saddle point), followed by a sudden drop. For very deep complex neural nets, such plateaus can take hours of training, and we are often tempted to stop the training, because we believe it is \"as good as it gets\"! Another side effect of \"immature interruption\" of training is the network finding (learning) illusory correlations.\n", "\n", "To better understand this, let's do the next demonstration and exercise." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Demonstration: Illusory Correlations\n", "\n", "Our original dataset has 4 animals: Canary, Robin, Goldfish, and Tuna. These animals all have bones. Therefore if we include a \"has bone\" feature, the network would learn it at the second level (i.e. second bump, second mode convergence), when it learns the animal-plants distinction.\n", "\n", "What if the dataset has Shark instead of Goldfish. Sharks don't have bones (their skeletons are made of cartilaginous, which is much lighter than true bone and more flexible). Then we will have a feature which is *True* (i.e. +1) for Tuna, Robin, and Canary, but *False* (i.e. -1) for all the plants and the shark! Let's see what the network does.\n", "\n", "First, we add the new feature to the targets. We then start training our LNN and in every epoch, record the network prediction for \"sharks having bones\".\n", "\n", "
\"Simple
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "# Sampling new data from the tree\n", "tree_labels, tree_features = generate_hsd()\n", "\n", "# Replacing Goldfish with Shark\n", "item_names = ['Shark', 'Tuna', 'Robin', 'Canary',\n", " 'Rose', 'Daisy', 'Pine', 'Oak']\n", "\n", "# Index of label to record\n", "illusion_idx = 0 # Shark is the first element\n", "\n", "# The new feature (has bones) vector\n", "new_feature = [-1, 1, 1, 1, -1, -1, -1, -1]\n", "its_label = 'has_bones'\n", "\n", "# Adding feature has_bones to the feature array\n", "tree_features = add_feature(tree_features, new_feature)\n", "\n", "# Plotting\n", "plot_tree_data(item_names, tree_features, its_label)" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "You can see the new feature shown in the last column of the plot above.\n", "\n", "Now we can train the network on the new data, and record the network prediction (output) for Shark (indexed 0) label and \"has bone\" feature (last feature, indexed -1), during the training.\n", "\n", "Here is the snippet from the training loop that keeps track of network prediction for `illusory_i`th label and last (`-1`) feature:\n", "\n", "```python\n", "pred_ij = predictions.detach()[illusory_i, -1]\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Make sure you execute this cell to train the network and plot\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "#@markdown #### Make sure you execute this cell to train the network and plot\n", "\n", "# Convert (cast) data from np.ndarray to torch.Tensor\n", "label_tensor = torch.tensor(tree_labels).float()\n", "feature_tensor = torch.tensor(tree_features).float()\n", "\n", "lr = 100.0 # Learning rate\n", "gamma = 1e-12 # Initialization scale\n", "n_epochs = 250 # Number of epochs\n", "dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", "dim_hidden = 30 # Hidden neurons\n", "dim_output = feature_tensor.size(1)\n", "\n", "# Model instantiation\n", "dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", "# Weights re-initialization\n", "initializer_(dlnn_model, gamma)\n", "\n", "# Training\n", "_, modes, _, ill_predictions = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr,\n", " illusory_i=illusion_idx)\n", "\n", "# Label for the plot\n", "ill_label = f\"Prediction for {item_names[illusion_idx]} {its_label}\"\n", "\n", "# Plotting\n", "plot_ills_sv_twin(ill_predictions, modes, ill_label)" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "It seems that the network starts by learning an \"illusory correlation\" that sharks have bones, and in later epochs, as it learns deeper representations, it can see (learn) beyond the illusory correlation. This is important to remember that we never presented the network with any data saying that sharks have bones." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Exercise 4: Illusory Correlations\n", "\n", "This exercise is just for you to explore the idea of illusory correlations. Think of medical, natural, or possibly social illusory correlations which can test the learning power of deep linear neural nets.\n", "\n", "**important notes**: the generated data is independent of tree labels, therefore the names are just for convenience.\n", "\n", "Here is our example for **Non-human Living things do not speak**. The lines marked by `{edit}` are for you to change in your example." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "# Sampling new data from the tree\n", "tree_labels, tree_features = generate_hsd()\n", "\n", "# {edit} Replacing Canary with Parrot\n", "item_names = ['Goldfish', 'Tuna', 'Robin', 'Parrot',\n", " 'Rose', 'Daisy', 'Pine', 'Oak']\n", "\n", "# {edit} Index of label to record\n", "illusion_idx = 3 # Parrot is the fourth element\n", "\n", "# {edit} The new feature (cannot speak) vector\n", "new_feature = [1, 1, 1, -1, 1, 1, 1, 1]\n", "its_label = 'cannot_speak'\n", "\n", "# Adding feature has_bones to the feature array\n", "tree_features = add_feature(tree_features, new_feature)\n", "\n", "# Plotting\n", "plot_tree_data(item_names, tree_features, its_label)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " #### Make sure you execute this cell to train the network and plot\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @markdown #### Make sure you execute this cell to train the network and plot\n", "\n", "# Convert (cast) data from np.ndarray to torch.Tensor\n", "label_tensor = torch.tensor(tree_labels).float()\n", "feature_tensor = torch.tensor(tree_features).float()\n", "\n", "lr = 100.0 # Learning rate\n", "gamma = 1e-12 # Initialization scale\n", "n_epochs = 250 # Number of epochs\n", "dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", "dim_hidden = 30 # Hidden neurons\n", "dim_output = feature_tensor.size(1)\n", "\n", "# Model instantiation\n", "dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", "# Weights re-initialization\n", "initializer_(dlnn_model, gamma)\n", "\n", "# Training\n", "_, modes, _, ill_predictions = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr,\n", " illusory_i=illusion_idx)\n", "\n", "# Label for the plot\n", "ill_label = f\"Prediction for {item_names[illusion_idx]} {its_label}\"\n", "\n", "# Plotting\n", "plot_ills_sv_twin(ill_predictions, modes, ill_label)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Illusory_Correlations_Exercise\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Video 7: Illusory Correlations - Discussion\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 7: Illusory Correlations - Discussion\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', '6VLHKQjQJmI'), ('Bilibili', 'BV1vv411E7rg')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Illusory_Correlations_Discussion_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Summary\n", "\n", "The second day of the course has ended. So, in the third tutorial of the linear deep learning day we have learned more advanced topics. In the beginning we implemented a deep linear neural network and then we studied its learning dynamics using the linear algebra tool called singular value decomposition. Then, we learned about the representational similarity analysis and the illusory correlation." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Video 8: Outro\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 8: Outro\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'N2szOIsKyXE'), ('Bilibili', 'BV1AL411n7ns')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Outro_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Daily survey\n", "\n", "Don't forget to complete your reflections and content check in the daily survey! Please be patient after logging in as there is\n", "a small delay before you will be redirected to the survey.\n", "\n", "\"button" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "---\n", "# Bonus\n", "\n", "*Time estimate: ~20-30 mins*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Video 9: Linear Regression\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 9: Linear Regression\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'uULOAbhYaaE'), ('Bilibili', 'BV1Pf4y1L71L')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Linear_Regression_Bonus_Video\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Section 5.1: Linear Regression\n", "\n", "Generally, *regression* refers to a set of methods for modeling the mapping (relationship) between one (or more) independent variable(s) (i.e., features) and one (or more) dependent variable(s) (i.e., labels). For example, if we want to examine the relative impacts of calendar date, GPS coordinates, and time of the say (the independent variables) on air temperature (the dependent variable). On the other hand, regression can be used for predictive analysis. Thus the independent variables are also called predictors. When the model contains more than one predictor, then the method is called *multiple regression*, and if it contains more than one dependent variable called *multivariate regression*. Regression problems pop up whenever we want to predict a numerical (usually continuous) value.\n", "\n", "The independent variables are collected in vector $\\mathbf{x} \\in \\mathbb{R}^M$, where $M$ denotes the number of independent variables, while the dependent variables are collected in vector $\\mathbf{y} \\in \\mathbb{R}^N$, where $N$ denotes the number of dependent variables. And the mapping between them is represented by the weight matrix $\\mathbf{W} \\in \\mathbb{R}^{N \\times M}$ and a bias vector $\\mathbf{b} \\in \\mathbb{R}^{N}$ (generalizing to affine mappings).\n", "\n", "The multivariate regression model can be written as:\n", "\n", "\\begin{equation}\n", "\\mathbf{y} = \\mathbf{W} ~ \\mathbf{x} + \\mathbf{b}\n", "\\end{equation}\n", "\n", "or it can be written in matrix format as:\n", "\n", "\\begin{equation}\n", "\\begin{bmatrix} y_{1} \\\\ y_{2} \\\\ \\vdots \\\\ y_{N} \\\\ \\end{bmatrix} = \\begin{bmatrix} w_{1,1} & w_{1,2} & \\dots & w_{1,M} \\\\ w_{2,1} & w_{2,2} & \\dots & w_{2,M} \\\\ \\vdots & \\ddots & \\ddots & \\vdots \\\\ w_{N,1} & w_{N,2} & \\dots & w_{N,M} \\end{bmatrix} \\begin{bmatrix} x_{1} \\\\ x_{2} \\\\ \\vdots \\\\ x_{M} \\\\ \\end{bmatrix} + \\begin{bmatrix} b_{1} \\\\ b_{2} \\\\ \\vdots \\\\b_{N} \\\\ \\end{bmatrix}\n", "\\end{equation}" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Section 5.2: Vectorized regression\n", "\n", "Linear regression can be simply extended to multi-samples ($D$) input-output mapping, which we can collect in a matrix $\\mathbf{X} \\in \\mathbb{R}^{M \\times D}$, sometimes called the design matrix. The sample dimension also shows up in the output matrix $\\mathbf{Y} \\in \\mathbb{R}^{N \\times D}$. Thus, linear regression takes the following form:\n", "\n", "\\begin{equation}\n", "\\mathbf{Y} = \\mathbf{W} ~ \\mathbf{X} + \\mathbf{b}\n", "\\end{equation}\n", "\n", "where matrix $\\mathbf{W} \\in \\mathbb{R}^{N \\times M}$ and the vector $\\mathbf{b} \\in \\mathbb{R}^{N}$ (broadcasted over sample dimension) are the desired parameters to find." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Section 5.3: Analytical Linear Regression" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "Linear regression is a relatively simple optimization problem. Unlike most other models that we will see in this course, linear regression for mean squared loss can be solved analytically.\n", "\n", "For $D$ samples (batch size), $\\mathbf{X} \\in \\mathbb{R}^{M \\times D}$, and $\\mathbf{Y} \\in \\mathbb{R}^{N \\times D}$, the goal of linear regression is to find $\\mathbf{W} \\in \\mathbb{R}^{N \\times M}$ such that:\n", "\n", "\\begin{equation}\n", "\\mathbf{Y} = \\mathbf{W} ~ \\mathbf{X}\n", "\\end{equation}\n", "\n", "Given the Squared Error loss function, we have:\n", "\n", "\\begin{equation}\n", "Loss(\\mathbf{W}) = ||\\mathbf{Y} - \\mathbf{W} ~ \\mathbf{X}||^2\n", "\\end{equation}\n", "\n", "So, using matrix notation, the optimization problem is given by:\n", "\n", "\\begin{align}\n", "\\mathbf{W^{*}} &= \\underset{\\mathbf{W}}{\\mathrm{argmin}} \\left( Loss (\\mathbf{W}) \\right) \\\\\n", " &= \\underset{\\mathbf{W}}{\\mathrm{argmin}} \\left( ||\\mathbf{Y} - \\mathbf{W} ~ \\mathbf{X}||^2 \\right) \\\\\n", "&= \\underset{\\mathbf{W}}{\\mathrm{argmin}} \\left( \\left( \\mathbf{Y} - \\mathbf{W} ~ \\mathbf{X}\\right)^{\\top} \\left( \\mathbf{Y} - \\mathbf{W} ~ \\mathbf{X}\\right) \\right)\n", "\\end{align}\n", "\n", "To solve the minimization problem, we can simply set the derivative of the loss with respect to $\\mathbf{W}$ to zero.\n", "\n", "\\begin{equation}\n", "\\dfrac{\\partial Loss}{\\partial \\mathbf{W}} = 0\n", "\\end{equation}\n", "\n", "Assuming that $\\mathbf{X}\\mathbf{X}^{\\top}$ is full-rank, and thus it is invertible, we can write:\n", "\n", "\\begin{equation}\n", "\\mathbf{W}^{\\mathbf{*}} = \\mathbf{Y} \\mathbf{X}^{\\top} \\left( \\mathbf{X} \\mathbf{X}^{\\top} \\right) ^{-1}\n", "\\end{equation}\n", "\n", "
\n", "\n", "**Note:** The $||\\cdot||$ denotes the norm 2 or the Euclidean norm of a vector." ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "### Coding Exercise 5.3.1: Analytical solution to LR\n", "\n", "Complete the function `linear_regression` for finding the analytical solution to linear regression." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "def linear_regression(X, Y):\n", " \"\"\"\n", " Analytical Linear regression\n", "\n", " Args:\n", " X: np.ndarray\n", " Design matrix\n", " Y: np.ndarray\n", " Target ouputs\n", "\n", " Returns:\n", " W: np.ndarray\n", " Estimated weights (mapping)\n", " \"\"\"\n", " assert isinstance(X, np.ndarray)\n", " assert isinstance(Y, np.ndarray)\n", " M, Dx = X.shape\n", " N, Dy = Y.shape\n", " assert Dx == Dy\n", " #################################################\n", " ## Complete the linear_regression_exercise function\n", " # Complete the function and remove or comment the line below\n", " raise NotImplementedError(\"Linear Regression `linear_regression`\")\n", " #################################################\n", " W = ...\n", "\n", " return W\n", "\n", "\n", "W_true = np.random.randint(low=0, high=10, size=(3, 3)).astype(float)\n", "\n", "X_train = np.random.rand(3, 37) # 37 samples\n", "noise = np.random.normal(scale=0.01, size=(3, 37))\n", "Y_train = W_true @ X_train + noise\n", "\n", "## Uncomment and run\n", "# W_estimate = linear_regression(X_train, Y_train)\n", "# print(f\"True weights:\\n {W_true}\")\n", "# print(f\"\\nEstimated weights:\\n {np.round(W_estimate, 1)}\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "execution": {} }, "source": [ "[*Click for solution*](https://github.com/NeuromatchAcademy/course-content-dl/tree/main/tutorials/W1D2_LinearDeepLearning/solutions/W1D2_Tutorial3_Solution_aa3d6993.py)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Analytical_Solution_to_LR_Exercise\")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "## Demonstration: Linear Regression vs. DLNN\n", "\n", "A linear neural network with NO hidden layer is very similar to linear regression in its core. We also know that no matter how many hidden layers a linear network has, it can be compressed to linear regression (no hidden layers).\n", "\n", "In this demonstration, we use the hierarchically structured data to:\n", "\n", "* analytically find the mapping between features and labels\n", "* train a zero-depth LNN to find the mapping\n", "* compare them to the $W_{tot}$ from the already trained deep LNN" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "# Sampling new data from the tree\n", "tree_labels, tree_features = generate_hsd()\n", "\n", "# Convert (cast) data from np.ndarray to torch.Tensor\n", "label_tensor = torch.tensor(tree_labels).float()\n", "feature_tensor = torch.tensor(tree_features).float()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "# Calculating the W_tot for deep network (already trained model)\n", "\n", "lr = 100.0 # Learning rate\n", "gamma = 1e-12 # Initialization scale\n", "n_epochs = 250 # Number of epochs\n", "dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", "dim_hidden = 30 # Hidden neurons\n", "dim_output = 10000 # Output dimension = `feature_tensor.size(1)`\n", "\n", "# Model instantiation\n", "dlnn_model = LNNet(dim_input, dim_hidden, dim_output)\n", "\n", "# Weights re-initialization\n", "initializer_(dlnn_model, gamma)\n", "\n", "# Training\n", "losses, modes, rsms, ills = train(dlnn_model,\n", " label_tensor,\n", " feature_tensor,\n", " n_epochs=n_epochs,\n", " lr=lr)\n", "\n", "deep_W_tot = torch.eye(dim_input)\n", "for weight in dlnn_model.parameters():\n", " deep_W_tot = weight @ deep_W_tot\n", "deep_W_tot = deep_W_tot.detach().numpy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "# Analytically estimation of weights\n", "# First dimension of data is `batch`, so we need to transpose our data\n", "analytical_weights = linear_regression(tree_labels.T, tree_features.T)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "class LRNet(nn.Module):\n", " \"\"\"\n", " A Linear Neural Net with ZERO hidden layer (LR net)\n", " \"\"\"\n", "\n", " def __init__(self, in_dim, out_dim):\n", " \"\"\"\n", " Initialize LRNet\n", "\n", " Args:\n", " in_dim: int\n", " Input dimension\n", " hid_dim: int\n", " Hidden dimension\n", "\n", " Returns:\n", " Nothing\n", " \"\"\"\n", " super().__init__()\n", " self.in_out = nn.Linear(in_dim, out_dim, bias=False)\n", "\n", " def forward(self, x):\n", " \"\"\"\n", " Forward pass of LRNet\n", "\n", " Args:\n", " x: torch.Tensor\n", " Input tensor\n", "\n", " Returns:\n", " out: torch.Tensor\n", " Output/Prediction\n", " \"\"\"\n", " out = self.in_out(x) # Output (Prediction)\n", " return out" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "lr = 1000.0 # Learning rate\n", "gamma = 1e-12 # Initialization scale\n", "n_epochs = 250 # Number of epochs\n", "dim_input = 8 # Input dimension = `label_tensor.size(1)`\n", "dim_output = 10000 # Output dimension = `feature_tensor.size(1)`\n", "\n", "# Model instantiation\n", "LR_model = LRNet(dim_input, dim_output)\n", "optimizer = optim.SGD(LR_model.parameters(), lr=lr)\n", "criterion = nn.MSELoss()\n", "\n", "losses = np.zeros(n_epochs) # Loss records\n", "for i in range(n_epochs): # Training loop\n", " optimizer.zero_grad()\n", " predictions = LR_model(label_tensor)\n", " loss = criterion(predictions, feature_tensor)\n", " loss.backward()\n", " optimizer.step()\n", " losses[i] = loss.item()\n", "\n", "# Trained weights from zero_depth_model\n", "LR_model_weights = next(iter(LR_model.parameters())).detach().numpy()\n", "\n", "plot_loss(losses, \"Training loss for zero depth LNN\", c=\"r\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": {} }, "outputs": [], "source": [ "print(\"The final weights from all methods are approximately equal?! \"\n", "\"{}!\".format(\n", " (np.allclose(analytical_weights, LR_model_weights, atol=1e-02) and \\\n", " np.allclose(analytical_weights, deep_W_tot, atol=1e-02))\n", " )\n", ")" ] }, { "cell_type": "markdown", "metadata": { "execution": {} }, "source": [ "As you may have guessed, they all arrive at the same results but through very different paths." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Video 10: Linear Regression - Discussion\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "remove-input" ] }, "outputs": [], "source": [ "# @title Video 10: Linear Regression - Discussion\n", "from ipywidgets import widgets\n", "from IPython.display import YouTubeVideo\n", "from IPython.display import IFrame\n", "from IPython.display import display\n", "\n", "\n", "class PlayVideo(IFrame):\n", " def __init__(self, id, source, page=1, width=400, height=300, **kwargs):\n", " self.id = id\n", " if source == 'Bilibili':\n", " src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'\n", " elif source == 'Osf':\n", " src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'\n", " super(PlayVideo, self).__init__(src, width, height, **kwargs)\n", "\n", "\n", "def display_videos(video_ids, W=400, H=300, fs=1):\n", " tab_contents = []\n", " for i, video_id in enumerate(video_ids):\n", " out = widgets.Output()\n", " with out:\n", " if video_ids[i][0] == 'Youtube':\n", " video = YouTubeVideo(id=video_ids[i][1], width=W,\n", " height=H, fs=fs, rel=0)\n", " print(f'Video available at https://youtube.com/watch?v={video.id}')\n", " else:\n", " video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,\n", " height=H, fs=fs, autoplay=False)\n", " if video_ids[i][0] == 'Bilibili':\n", " print(f'Video available at https://www.bilibili.com/video/{video.id}')\n", " elif video_ids[i][0] == 'Osf':\n", " print(f'Video available at https://osf.io/{video.id}')\n", " display(video)\n", " tab_contents.append(out)\n", " return tab_contents\n", "\n", "\n", "video_ids = [('Youtube', 'gG15_J0i05Y'), ('Bilibili', 'BV18v411E7Wg')]\n", "tab_contents = display_videos(video_ids, W=730, H=410)\n", "tabs = widgets.Tab()\n", "tabs.children = tab_contents\n", "for i in range(len(tab_contents)):\n", " tabs.set_title(i, video_ids[i][0])\n", "display(tabs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit your feedback\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "execution": {}, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# @title Submit your feedback\n", "content_review(f\"{feedback_prefix}_Linear_Regression_Discussion_Video\")" ] } ], "metadata": { "colab": { "collapsed_sections": [], "include_colab_link": true, "name": "W1D2_Tutorial3", "provenance": [], "toc_visible": true }, "kernel": { "display_name": "Python 3", "language": "python", "name": "python3" }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }