docs

Lightning-AI · Borda · Mar 24, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
commit 50408c6e29f2556b5d690857130338bac6f5c809
@@ -601,10 +601,11 @@ def forward(self, x):
 
 class XORDataset(data.Dataset):
     def __init__(self, size, std=0.1):
-        """
-        Inputs:
-            size - Number of data points we want to generate
-            std - Standard deviation of the noise (see generate_continuous_xor function).
+        """XORDataset.
+
+        Args:
+            size: Number of data points we want to generate
+            std: Standard deviation of the noise (see generate_continuous_xor function)
         """
         super().__init__()
         self.size = size

@@ -278,12 +278,13 @@ def vis_act_fn(act_fn, ax, x):
 # %%
 class BaseNetwork(nn.Module):
     def __init__(self, act_fn, input_size=784, num_classes=10, hidden_sizes=[512, 256, 256, 128]):
-        """
+        """Base Network
+
         Args:
             act_fn: Object of the activation function that should be used as non-linearity in the network.
             input_size: Size of the input images in pixels
             num_classes: Number of classes we want to predict
-            hidden_sizes: A list of integers specifying the hidden layer sizes in the NN.
+            hidden_sizes: A list of integers specifying the hidden layer sizes in the NN
         """
         super().__init__()
 
@@ -430,10 +431,11 @@ def save_model(model, model_path, model_name):
 
 # %%
 def visualize_gradients(net, color="C0"):
-    """
+    """Visualize gradients
+
     Args:
         net: Object of class BaseNetwork
-        color: Color in which we want to visualize the histogram (for easier separation of activation functions).
+        color: Color in which we want to visualize the histogram (for easier separation of activation functions)
     """
     net.eval()
     small_loader = data.DataLoader(train_set, batch_size=256, shuffle=False)

@@ -151,12 +151,13 @@
 # %%
 class BaseNetwork(nn.Module):
     def __init__(self, act_fn, input_size=784, num_classes=10, hidden_sizes=[512, 256, 256, 128]):
-        """
+        """Base Network
+
         Args:
             act_fn: Object of the activation function that should be used as non-linearity in the network.
             input_size: Size of the input images in pixels
             num_classes: Number of classes we want to predict
-            hidden_sizes: A list of integers specifying the hidden layer sizes in the NN.
+            hidden_sizes: A list of integers specifying the hidden layer sizes in the NN
         """
         super().__init__()
 
@@ -258,7 +259,7 @@ def visualize_gradients(model, color="C0", print_variance=False):
     """
     Args:
         net: Object of class BaseNetwork
-        color: Color in which we want to visualize the histogram (for easier separation of activation functions).
+        color: Color in which we want to visualize the histogram (for easier separation of activation functions)
     """
     model.eval()
     small_loader = data.DataLoader(train_set, batch_size=1024, shuffle=False)

@@ -417,12 +417,13 @@ def train_model(model_name, save_name=None, **kwargs):
 # %%
 class InceptionBlock(nn.Module):
     def __init__(self, c_in, c_red: dict, c_out: dict, act_fn):
-        """
-        Inputs:
+        """InceptionBlock
+
+        Args:
             c_in - Number of input feature maps from the previous layers
             c_red - Dictionary with keys "3x3" and "5x5" specifying the output of the dimensionality reducing 1x1 convolutions
             c_out - Dictionary with keys "1x1", "3x3", "5x5", and "max"
-            act_fn - Activation class constructor (e.g. nn.ReLU).
+            act_fn - Activation class constructor (e.g. nn.ReLU)
         """
         super().__init__()
 
@@ -666,12 +667,13 @@ def forward(self, x):
 
 class ResNetBlock(nn.Module):
     def __init__(self, c_in, act_fn, subsample=False, c_out=-1):
-        """
+        """ResNetBlock
+
         Inputs:
             c_in - Number of input features
             act_fn - Activation class constructor (e.g. nn.ReLU)
             subsample - If True, we want to apply a stride inside the block and reduce the output shape by 2 in height and width
-            c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in.
+            c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in
         """
         super().__init__()
         if not subsample:
@@ -711,12 +713,13 @@ def forward(self, x):
 # %%
 class PreActResNetBlock(nn.Module):
     def __init__(self, c_in, act_fn, subsample=False, c_out=-1):
-        """
+        """PreAct ResNet Block
+
         Inputs:
             c_in - Number of input features
             act_fn - Activation class constructor (e.g. nn.ReLU)
             subsample - If True, we want to apply a stride inside the block and reduce the output shape by 2 in height and width
-            c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in.
+            c_out - Number of output features. Note that this is only relevant if subsample is True, as otherwise, c_out = c_in
         """
         super().__init__()
         if not subsample:
@@ -780,13 +783,14 @@ def __init__(
         block_name="ResNetBlock",
         **kwargs,
     ):
-        """
+        """ResNet
+
         Inputs:
             num_classes - Number of classification outputs (10 for CIFAR10)
             num_blocks - List with the number of ResNet blocks to use. The first block of each group uses downsampling, except the first.
             c_hidden - List with the hidden dimensionalities in the different blocks. Usually multiplied by 2 the deeper we go.
             act_fn_name - Name of the activation function to use, looked up in "act_fn_by_name"
-            block_name - Name of the ResNet block, looked up in "resnet_blocks_by_name".
+            block_name - Name of the ResNet block, looked up in "resnet_blocks_by_name"
         """
         super().__init__()
         assert block_name in resnet_blocks_by_name
@@ -949,12 +953,13 @@ def forward(self, x):
 # %%
 class DenseLayer(nn.Module):
     def __init__(self, c_in, bn_size, growth_rate, act_fn):
-        """
+        """DenseLayer
+
         Inputs:
             c_in - Number of input channels
             bn_size - Bottleneck size (factor of growth rate) for the output of the 1x1 convolution. Typically between 2 and 4.
             growth_rate - Number of output channels of the 3x3 convolution
-            act_fn - Activation class constructor (e.g. nn.ReLU).
+            act_fn - Activation class constructor (e.g. nn.ReLU)
         """
         super().__init__()
         self.net = nn.Sequential(
@@ -980,13 +985,14 @@ def forward(self, x):
 # %%
 class DenseBlock(nn.Module):
     def __init__(self, c_in, num_layers, bn_size, growth_rate, act_fn):
-        """
+        """Dense Block
+
         Inputs:
             c_in - Number of input channels
             num_layers - Number of dense layers to apply in the block
             bn_size - Bottleneck size to use in the dense layers
             growth_rate - Growth rate to use in the dense layers
-            act_fn - Activation function to use in the dense layers.
+            act_fn - Activation function to use in the dense layers
         """
         super().__init__()
         layers = []

@@ -463,12 +463,13 @@ def forward(self, x, mask=None, return_attention=False):
 # %%
 class EncoderBlock(nn.Module):
     def __init__(self, input_dim, num_heads, dim_feedforward, dropout=0.0):
-        """
+        """EncoderBlock
+
         Args:
             input_dim: Dimensionality of the input
             num_heads: Number of heads to use in the attention block
             dim_feedforward: Dimensionality of the hidden layer in the MLP
-            dropout: Dropout probability to use in the dropout layers.
+            dropout: Dropout probability to use in the dropout layers
         """
         super().__init__()
 
@@ -572,7 +573,8 @@ def get_attention_maps(self, x, mask=None):
 # %%
 class PositionalEncoding(nn.Module):
     def __init__(self, d_model, max_len=5000):
-        """
+        """Positional Encoding
+
         Args:
             d_model: Hidden dimensionality of the input.
             max_len: Maximum length of a sequence to expect.
@@ -758,7 +760,8 @@ def __init__(
         dropout=0.0,
         input_dropout=0.0,
     ):
-        """
+        """TransformerPredictor
+
         Args:
             input_dim: Hidden dimensionality of the input
             model_dim: Hidden dimensionality to use inside the Transformer
@@ -769,7 +772,7 @@ def __init__(
             warmup: Number of warmup steps. Usually between 50 and 500
             max_iters: Number of maximum iterations the model is trained for. This is needed for the CosineWarmup scheduler
             dropout: Dropout to apply inside the model
-            input_dropout: Dropout to apply on the input features.
+            input_dropout: Dropout to apply on the input features
         """
         super().__init__()
         self.save_hyperparameters()

@@ -162,13 +162,14 @@ def __init__(self, c_in, c_out):
         self.projection = nn.Linear(c_in, c_out)
 
     def forward(self, node_feats, adj_matrix):
-        """
+        """forward
+
         Args:
             node_feats: Tensor with node features of shape [batch_size, num_nodes, c_in]
             adj_matrix: Batch of adjacency matrices of the graph. If there is an edge from i to j,
                          adj_matrix[b,i,j]=1 else 0. Supports directed edges by non-symmetric matrices.
                          Assumes to already have added the identity connections.
-                         Shape: [batch_size, num_nodes, num_nodes].
+                         Shape: [batch_size, num_nodes, num_nodes]
         """
         # Num neighbours = number of incoming edges
         num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
@@ -317,12 +318,13 @@ def __init__(self, c_in, c_out, num_heads=1, concat_heads=True, alpha=0.2):
         nn.init.xavier_uniform_(self.a.data, gain=1.414)
 
     def forward(self, node_feats, adj_matrix, print_attn_probs=False):
-        """
+        """forward
+
         Args:
             node_feats: Input features of the node. Shape: [batch_size, c_in]
             adj_matrix: Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes]
             print_attn_probs: If True, the attention weights are printed during the forward pass
-                               (for debugging purposes).
+                               (for debugging purposes)
         """
         batch_size, num_nodes = node_feats.size(0), node_feats.size(1)
 
@@ -497,15 +499,16 @@ def __init__(
         dp_rate=0.1,
         **kwargs,
     ):
-        """
+        """GNNModel
+
         Args:
             c_in: Dimension of input features
             c_hidden: Dimension of hidden features
             c_out: Dimension of the output features. Usually number of classes in classification
             num_layers: Number of "hidden" graph layers
             layer_name: String of the graph layer to use
             dp_rate: Dropout rate to apply throughout the network
-            kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT).
+            kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT)
         """
         super().__init__()
         gnn_layer = gnn_layer_by_name[layer_name]
@@ -523,10 +526,11 @@ def __init__(
         self.layers = nn.ModuleList(layers)
 
     def forward(self, x, edge_index):
-        """
+        """forward
+
         Args:
             x: Input features per node
-            edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation).
+            edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
         """
         for layer in self.layers:
             # For graph layers, we need to add the "edge_index" tensor as additional input
@@ -549,13 +553,14 @@ def forward(self, x, edge_index):
 # %%
 class MLPModel(nn.Module):
     def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1):
-        """
+        """MLPModel
+
         Args:
             c_in: Dimension of input features
             c_hidden: Dimension of hidden features
             c_out: Dimension of the output features. Usually number of classes in classification
             num_layers: Number of hidden layers
-            dp_rate: Dropout rate to apply throughout the network.
+            dp_rate: Dropout rate to apply throughout the network
         """
         super().__init__()
         layers = []
@@ -567,9 +572,10 @@ def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1):
         self.layers = nn.Sequential(*layers)
 
     def forward(self, x, *args, **kwargs):
-        """
+        """forward
+
         Args:
-            x: Input features per node.
+            x: Input features per node
         """
         return self.layers(x)
 
@@ -844,24 +850,26 @@ def print_results(result_dict):
 # %%
 class GraphGNNModel(nn.Module):
     def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs):
-        """
+        """GraphGNNModel
+
         Args:
             c_in: Dimension of input features
             c_hidden: Dimension of hidden features
             c_out: Dimension of output features (usually number of classes)
             dp_rate_linear: Dropout rate before the linear layer (usually much higher than inside the GNN)
-            kwargs: Additional arguments for the GNNModel object.
+            kwargs: Additional arguments for the GNNModel object
         """
         super().__init__()
         self.GNN = GNNModel(c_in=c_in, c_hidden=c_hidden, c_out=c_hidden, **kwargs)  # Not our prediction output yet!
         self.head = nn.Sequential(nn.Dropout(dp_rate_linear), nn.Linear(c_hidden, c_out))
 
     def forward(self, x, edge_index, batch_idx):
-        """
+        """forward
+
         Args:
             x: Input features per node
             edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
-            batch_idx: Index of batch element for each node.
+            batch_idx: Index of batch element for each node
         """
         x = self.GNN(x, edge_index)
         x = geom_nn.global_mean_pool(x, batch_idx)  # Average pooling

@@ -335,12 +335,13 @@ def forward(self, x):
 # %%
 class Sampler:
     def __init__(self, model, img_shape, sample_size, max_len=8192):
-        """
+        """Sampler
+
         Args:
             model: Neural network to use for modeling E_theta
             img_shape: Shape of the images to model
             sample_size: Batch size of the samples
-            max_len: Maximum number of data points to keep in the buffer.
+            max_len: Maximum number of data points to keep in the buffer
         """
         super().__init__()
         self.model = model

@@ -131,12 +131,13 @@ def get_train_images(num):
 # %%
 class Encoder(nn.Module):
     def __init__(self, num_input_channels: int, base_channel_size: int, latent_dim: int, act_fn: object = nn.GELU):
-        """
+        """Encoder
+
         Args:
            num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
            base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
            latent_dim : Dimensionality of latent representation z
-           act_fn : Activation function used throughout the encoder network.
+           act_fn : Activation function used throughout the encoder network
         """
         super().__init__()
         c_hid = base_channel_size
@@ -190,12 +191,13 @@ def forward(self, x):
 # %%
 class Decoder(nn.Module):
     def __init__(self, num_input_channels: int, base_channel_size: int, latent_dim: int, act_fn: object = nn.GELU):
-        """
+        """Decoder
+
         Args:
            num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3
            base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it.
            latent_dim : Dimensionality of latent representation z
-           act_fn : Activation function used throughout the decoder network.
+           act_fn : Activation function used throughout the decoder network
         """
         super().__init__()
         c_hid = base_channel_size