Skip to content
This repository was archived by the owner on Aug 28, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def __init__(self, size, std=0.1):
"""
Inputs:
size - Number of data points we want to generate
std - Standard deviation of the noise (see generate_continuous_xor function)
std - Standard deviation of the noise (see generate_continuous_xor function).
"""
super().__init__()
self.size = size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def get_grads(act_fn, x):
Args:
act_fn: An object of the class "ActivationFunction" with an implemented forward pass.
x: 1D input tensor.

Returns:
A tensor with the same size of x containing the gradients of act_fn at x.
"""
Expand Down Expand Up @@ -282,7 +283,7 @@ def __init__(self, act_fn, input_size=784, num_classes=10, hidden_sizes=[512, 25
act_fn: Object of the activation function that should be used as non-linearity in the network.
input_size: Size of the input images in pixels
num_classes: Number of classes we want to predict
hidden_sizes: A list of integers specifying the hidden layer sizes in the NN
hidden_sizes: A list of integers specifying the hidden layer sizes in the NN.
"""
super().__init__()

Expand Down Expand Up @@ -432,7 +433,7 @@ def visualize_gradients(net, color="C0"):
"""
Args:
net: Object of class BaseNetwork
color: Color in which we want to visualize the histogram (for easier separation of activation functions)
color: Color in which we want to visualize the histogram (for easier separation of activation functions).
"""
net.eval()
small_loader = data.DataLoader(train_set, batch_size=256, shuffle=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def __init__(self, act_fn, input_size=784, num_classes=10, hidden_sizes=[512, 25
act_fn: Object of the activation function that should be used as non-linearity in the network.
input_size: Size of the input images in pixels
num_classes: Number of classes we want to predict
hidden_sizes: A list of integers specifying the hidden layer sizes in the NN
hidden_sizes: A list of integers specifying the hidden layer sizes in the NN.
"""
super().__init__()

Expand Down Expand Up @@ -258,7 +258,7 @@ def visualize_gradients(model, color="C0", print_variance=False):
"""
Args:
net: Object of class BaseNetwork
color: Color in which we want to visualize the histogram (for easier separation of activation functions)
color: Color in which we want to visualize the histogram (for easier separation of activation functions).
"""
model.eval()
small_loader = data.DataLoader(train_set, batch_size=1024, shuffle=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def __init__(self, c_in, c_red: dict, c_out: dict, act_fn):
c_in - Number of input feature maps from the previous layers
c_red - Dictionary with keys "3x3" and "5x5" specifying the output of the dimensionality reducing 1x1 convolutions
c_out - Dictionary with keys "1x1", "3x3", "5x5", and "max"
act_fn - Activation class constructor (e.g. nn.ReLU)
act_fn - Activation class constructor (e.g. nn.ReLU).
"""
super().__init__()

Expand Down Expand Up @@ -670,7 +670,7 @@ def __init__(self, c_in, act_fn, subsample=False, c_out=-1):
c_in - Number of input features
act_fn - Activation class constructor (e.g. nn.ReLU)
subsample - If True, we want to apply a stride inside the block and reduce the output shape by 2 in height and width
c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in
c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in.
"""
super().__init__()
if not subsample:
Expand Down Expand Up @@ -715,7 +715,7 @@ def __init__(self, c_in, act_fn, subsample=False, c_out=-1):
c_in - Number of input features
act_fn - Activation class constructor (e.g. nn.ReLU)
subsample - If True, we want to apply a stride inside the block and reduce the output shape by 2 in height and width
c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in
c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in.
"""
super().__init__()
if not subsample:
Expand Down Expand Up @@ -785,7 +785,7 @@ def __init__(
num_blocks - List with the number of ResNet blocks to use. The first block of each group uses downsampling, except the first.
c_hidden - List with the hidden dimensionalities in the different blocks. Usually multiplied by 2 the deeper we go.
act_fn_name - Name of the activation function to use, looked up in "act_fn_by_name"
block_name - Name of the ResNet block, looked up in "resnet_blocks_by_name"
block_name - Name of the ResNet block, looked up in "resnet_blocks_by_name".
"""
super().__init__()
assert block_name in resnet_blocks_by_name
Expand Down Expand Up @@ -953,7 +953,7 @@ def __init__(self, c_in, bn_size, growth_rate, act_fn):
c_in - Number of input channels
bn_size - Bottleneck size (factor of growth rate) for the output of the 1x1 convolution. Typically between 2 and 4.
growth_rate - Number of output channels of the 3x3 convolution
act_fn - Activation class constructor (e.g. nn.ReLU)
act_fn - Activation class constructor (e.g. nn.ReLU).
"""
super().__init__()
self.net = nn.Sequential(
Expand Down Expand Up @@ -985,7 +985,7 @@ def __init__(self, c_in, num_layers, bn_size, growth_rate, act_fn):
num_layers - Number of dense layers to apply in the block
bn_size - Bottleneck size to use in the dense layers
growth_rate - Growth rate to use in the dense layers
act_fn - Activation function to use in the dense layers
act_fn - Activation function to use in the dense layers.
"""
super().__init__()
layers = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def __init__(self, input_dim, num_heads, dim_feedforward, dropout=0.0):
input_dim: Dimensionality of the input
num_heads: Number of heads to use in the attention block
dim_feedforward: Dimensionality of the hidden layer in the MLP
dropout: Dropout probability to use in the dropout layers
dropout: Dropout probability to use in the dropout layers.
"""
super().__init__()

Expand Down Expand Up @@ -573,7 +573,7 @@ def get_attention_maps(self, x, mask=None):
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=5000):
"""
Args
Args:
d_model: Hidden dimensionality of the input.
max_len: Maximum length of a sequence to expect.
"""
Expand Down Expand Up @@ -769,7 +769,7 @@ def __init__(
warmup: Number of warmup steps. Usually between 50 and 500
max_iters: Number of maximum iterations the model is trained for. This is needed for the CosineWarmup scheduler
dropout: Dropout to apply inside the model
input_dropout: Dropout to apply on the input features
input_dropout: Dropout to apply on the input features.
"""
super().__init__()
self.save_hyperparameters()
Expand Down
16 changes: 8 additions & 8 deletions course_UvA-DL/06-graph-neural-networks/GNN_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def forward(self, node_feats, adj_matrix):
adj_matrix: Batch of adjacency matrices of the graph. If there is an edge from i to j,
adj_matrix[b,i,j]=1 else 0. Supports directed edges by non-symmetric matrices.
Assumes to already have added the identity connections.
Shape: [batch_size, num_nodes, num_nodes]
Shape: [batch_size, num_nodes, num_nodes].
"""
# Num neighbours = number of incoming edges
num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
Expand Down Expand Up @@ -322,7 +322,7 @@ def forward(self, node_feats, adj_matrix, print_attn_probs=False):
node_feats: Input features of the node. Shape: [batch_size, c_in]
adj_matrix: Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes]
print_attn_probs: If True, the attention weights are printed during the forward pass
(for debugging purposes)
(for debugging purposes).
"""
batch_size, num_nodes = node_feats.size(0), node_feats.size(1)

Expand Down Expand Up @@ -505,7 +505,7 @@ def __init__(
num_layers: Number of "hidden" graph layers
layer_name: String of the graph layer to use
dp_rate: Dropout rate to apply throughout the network
kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT)
kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT).
"""
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
Expand All @@ -526,7 +526,7 @@ def forward(self, x, edge_index):
"""
Args:
x: Input features per node
edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation).
"""
for layer in self.layers:
# For graph layers, we need to add the "edge_index" tensor as additional input
Expand Down Expand Up @@ -555,7 +555,7 @@ def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1):
c_hidden: Dimension of hidden features
c_out: Dimension of the output features. Usually number of classes in classification
num_layers: Number of hidden layers
dp_rate: Dropout rate to apply throughout the network
dp_rate: Dropout rate to apply throughout the network.
"""
super().__init__()
layers = []
Expand All @@ -569,7 +569,7 @@ def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1):
def forward(self, x, *args, **kwargs):
"""
Args:
x: Input features per node
x: Input features per node.
"""
return self.layers(x)

Expand Down Expand Up @@ -849,7 +849,7 @@ def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs):
c_hidden: Dimension of hidden features
c_out: Dimension of output features (usually number of classes)
dp_rate_linear: Dropout rate before the linear layer (usually much higher than inside the GNN)
kwargs: Additional arguments for the GNNModel object
kwargs: Additional arguments for the GNNModel object.
"""
super().__init__()
self.GNN = GNNModel(c_in=c_in, c_hidden=c_hidden, c_out=c_hidden, **kwargs) # Not our prediction output yet!
Expand All @@ -860,7 +860,7 @@ def forward(self, x, edge_index, batch_idx):
Args:
x: Input features per node
edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
batch_idx: Index of batch element for each node
batch_idx: Index of batch element for each node.
"""
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx) # Average pooling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def __init__(self, model, img_shape, sample_size, max_len=8192):
model: Neural network to use for modeling E_theta
img_shape: Shape of the images to model
sample_size: Batch size of the samples
max_len: Maximum number of data points to keep in the buffer
max_len: Maximum number of data points to keep in the buffer.
"""
super().__init__()
self.model = model
Expand Down
6 changes: 3 additions & 3 deletions course_UvA-DL/08-deep-autoencoders/Deep_Autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def __init__(self, num_input_channels: int, base_channel_size: int, latent_dim:
num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
latent_dim : Dimensionality of latent representation z
act_fn : Activation function used throughout the encoder network
act_fn : Activation function used throughout the encoder network.
"""
super().__init__()
c_hid = base_channel_size
Expand Down Expand Up @@ -195,7 +195,7 @@ def __init__(self, num_input_channels: int, base_channel_size: int, latent_dim:
num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3
base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it.
latent_dim : Dimensionality of latent representation z
act_fn : Activation function used throughout the decoder network
act_fn : Activation function used throughout the decoder network.
"""
super().__init__()
c_hid = base_channel_size
Expand Down Expand Up @@ -263,7 +263,7 @@ def forward(self, x):
return x_hat

def _get_reconstruction_loss(self, batch):
"""Given a batch of images, this function returns the reconstruction loss (MSE in our case)"""
"""Given a batch of images, this function returns the reconstruction loss (MSE in our case)."""
x, _ = batch # We do not need the labels
x_hat = self.forward(x)
loss = F.mse_loss(x, x_hat, reduction="none")
Expand Down
15 changes: 8 additions & 7 deletions course_UvA-DL/09-normalizing-flows/NF_image_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def __init__(self, flows, import_samples=8):
"""
Args:
flows: A list of flows (each a nn.Module) that should be applied on the images.
import_samples: Number of importance samples to use during testing (see explanation below). Can be changed at any time
import_samples: Number of importance samples to use during testing (see explanation below). Can be changed at any time.
"""
super().__init__()
self.flows = nn.ModuleList(flows)
Expand Down Expand Up @@ -404,7 +404,7 @@ def __init__(self, alpha=1e-5, quants=256):
Args:
alpha: small constant that is used to scale the original input.
Prevents dealing with values very close to 0 and 1 when inverting the sigmoid
quants: Number of possible discrete values (usually 256 for 8-bit image)
quants: Number of possible discrete values (usually 256 for 8-bit image).
"""
super().__init__()
self.alpha = alpha
Expand Down Expand Up @@ -590,7 +590,7 @@ def __init__(self, var_flows, alpha=1e-5):
"""
Args:
var_flows: A list of flow transformations to use for modeling q(u|x)
alpha: Small constant, see Dequantization for details
alpha: Small constant, see Dequantization for details.
"""
super().__init__(alpha=alpha)
self.flows = nn.ModuleList(var_flows)
Expand Down Expand Up @@ -679,7 +679,7 @@ def forward(self, z, ldj, reverse=False, orig_img=None):
The ldj of this layer will be added to this tensor.
reverse: If True, we apply the inverse of the layer.
orig_img (optional): Only needed in VarDeq. Allows external
input to condition the flow on (e.g. original image)
input to condition the flow on (e.g. original image).
"""
# Apply network to masked input
z_in = z * self.mask
Expand Down Expand Up @@ -802,6 +802,7 @@ def __init__(self, c_in):
"""This module applies layer norm across channels in an image.

Has been shown to work well with ResNet connections.

Args:
c_in: Number of channels of the input
"""
Expand All @@ -821,7 +822,7 @@ def __init__(self, c_in, c_hidden):
This module applies a two-layer convolutional ResNet block with input gate
Args:
c_in: Number of channels of the input
c_hidden: Number of hidden dimensions we want to model (usually similar to c_in)
c_hidden: Number of hidden dimensions we want to model (usually similar to c_in).
"""
super().__init__()
self.net = nn.Sequential(
Expand Down Expand Up @@ -1249,7 +1250,7 @@ def interpolate(model, img1, img2, num_steps=8):
Args:
model: object of ImageFlow class that represents the (trained) flow model
img1, img2: Image tensors of shape [1, 28, 28]. Images between which should be interpolated.
num_steps: Number of interpolation steps. 8 interpolation steps mean 6 intermediate pictures besides img1 and img2
num_steps: Number of interpolation steps. 8 interpolation steps mean 6 intermediate pictures besides img1 and img2.
"""
imgs = torch.stack([img1, img2], dim=0).to(model.device)
z, _ = model.encode(imgs)
Expand Down Expand Up @@ -1322,7 +1323,7 @@ def visualize_dequant_distribution(model: ImageFlow, imgs: Tensor, title: str =
"""
Args:
model: The flow of which we want to visualize the dequantization distribution
imgs: Example training images of which we want to visualize the dequantization distribution
imgs: Example training images of which we want to visualize the dequantization distribution.
"""
imgs = imgs.to(device)
ldj = torch.zeros(imgs.shape[0], dtype=torch.float32).to(device)
Expand Down
4 changes: 2 additions & 2 deletions course_UvA-DL/11-vision-transformer/Vision_Transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def __init__(self, embed_dim, hidden_dim, num_heads, dropout=0.0):
hidden_dim - Dimensionality of hidden layer in feed-forward network
(usually 2-4x larger than embed_dim)
num_heads - Number of heads to use in the Multi-Head Attention block
dropout - Amount of dropout to apply in the feed-forward network
dropout - Amount of dropout to apply in the feed-forward network.
"""
super().__init__()

Expand Down Expand Up @@ -280,7 +280,7 @@ def __init__(
patch_size - Number of pixels that the patches have per dimension
num_patches - Maximum number of patches an image can have
dropout - Amount of dropout to apply in the feed-forward network and
on the input encoding
on the input encoding.
"""
super().__init__()

Expand Down
4 changes: 2 additions & 2 deletions course_UvA-DL/12-meta-learning/Meta_Learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def __init__(self, dataset_targets, N_way, K_shot, include_query=False, shuffle=
iteration (for training)
shuffle_once - If True, examples and classes are shuffled once in
the beginning, but kept constant across iterations
(for validation)
(for validation).
"""
super().__init__()
self.dataset_targets = dataset_targets
Expand Down Expand Up @@ -977,7 +977,7 @@ def __init__(self, dataset_targets, batch_size, N_way, K_shot, include_query=Fal
the implementation of sampling the same classes but
distinct examples for support and query set.
shuffle - If True, examples and classes are newly shuffled in each
iteration (for training)
iteration (for training).
"""
super().__init__()
self.batch_sampler = FewShotBatchSampler(dataset_targets, N_way, K_shot, include_query, shuffle)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,8 @@ def _convert_to_features(self, example_batch: datasets.arrow_dataset.Batch) -> B
# %%
class RteBoolqModule(pl.LightningModule):
"""A ``LightningModule`` that can be used to fine-tune a foundational model on either the RTE or BoolQ
SuperGLUE tasks using Hugging Face implementations of a given model and the `SuperGLUE Hugging Face dataset."""
SuperGLUE tasks using Hugging Face implementations of a given model and the `SuperGLUE Hugging Face dataset.
"""

def __init__(
self,
Expand Down
Loading