Make activation function configurable as argument (#60)

marianovitasari20 · jemrobinson · web-flow · commit ef5ad38eb0ff · 2025-08-19T12:21:01.000+01:00
* 🔧 Make activation function configurable as argument

* 🔨 Fixing format and lint code

* 🔨 Fixing unsorted imports

* 🔨 Remove extra whitespace

* 🔨 Fixing format and lint code

* 📦 Support string-based activations for all blocks

* ➖ Remove unused package

* 🔨 Remove extra whitespace

* 🔨 Replace lambda assignments with defs in blocks to satisfy E731

* 🔨 Fixing ruff format

* ♻️ Use class returned from dictionary directly so that we can pass arguments when we instantiate it. This is necessary to reproduce previous behaviour.

---------

Co-authored-by: James Robinson &lt;james.em.robinson@gmail.com&gt;
diff --git a/ice_station_zebra/models/common/activations.py b/ice_station_zebra/models/common/activations.py
@@ -0,0 +1,11 @@
+from torch import nn
+
+ACTIVATION_FROM_NAME: dict[str, type[nn.Module]] = {
+    "ReLU": nn.ReLU,
+    "LeakyReLU": nn.LeakyReLU,
+    "ELU": nn.ELU,
+    "GELU": nn.GELU,
+    "SiLU": nn.SiLU,
+    "Sigmoid": nn.Sigmoid,
+    "Tanh": nn.Tanh,
+}
diff --git a/ice_station_zebra/models/common/bottleneckblock.py b/ice_station_zebra/models/common/bottleneckblock.py
@@ -1,5 +1,7 @@
 from torch import Tensor, nn
 
+from .activations import ACTIVATION_FROM_NAME
+
 
 class BottleneckBlock(nn.Module):
     def __init__(
@@ -8,19 +10,22 @@ def __init__(
         out_channels: int,
         *,
         filter_size: int,
+        activation: str = "ReLU",
     ) -> None:
         """Initialise a BottleneckBlock."""
         super().__init__()
 
+        activation_layer = ACTIVATION_FROM_NAME[activation]
+
         self.model = nn.Sequential(
             nn.Conv2d(
                 in_channels, out_channels, kernel_size=filter_size, padding="same"
             ),
-            nn.ReLU(inplace=True),
+            activation_layer(inplace=True),
             nn.Conv2d(
                 out_channels, out_channels, kernel_size=filter_size, padding="same"
             ),
-            nn.ReLU(inplace=True),
+            activation_layer(inplace=True),
             nn.BatchNorm2d(num_features=out_channels),
         )
 
diff --git a/ice_station_zebra/models/common/convblock.py b/ice_station_zebra/models/common/convblock.py
@@ -1,5 +1,7 @@
 from torch import Tensor, nn
 
+from .activations import ACTIVATION_FROM_NAME
+
 
 class ConvBlock(nn.Module):
     def __init__(
@@ -9,19 +11,22 @@ def __init__(
         *,
         filter_size: int,
         final: bool = False,
+        activation: str = "ReLU",
     ) -> None:
         """Initialise a ConvBlock."""
         super().__init__()
 
+        activation_layer = ACTIVATION_FROM_NAME[activation]
+
         layers = [
             nn.Conv2d(
                 in_channels, out_channels, kernel_size=filter_size, padding="same"
             ),
-            nn.ReLU(inplace=True),
+            activation_layer(inplace=True),
             nn.Conv2d(
                 out_channels, out_channels, kernel_size=filter_size, padding="same"
             ),
-            nn.ReLU(inplace=True),
+            activation_layer(inplace=True),
         ]
         if final:
             layers += [
@@ -31,7 +36,7 @@ def __init__(
                     kernel_size=filter_size,
                     padding="same",
                 ),
-                nn.ReLU(inplace=True),
+                activation_layer(inplace=True),
             ]
 
         else:
diff --git a/ice_station_zebra/models/common/upconvblock.py b/ice_station_zebra/models/common/upconvblock.py
@@ -1,15 +1,24 @@
 from torch import Tensor, nn
 
+from .activations import ACTIVATION_FROM_NAME
+
 
 class UpconvBlock(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int) -> None:
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        activation: str = "ReLU",
+    ) -> None:
         """Initialise an UpconvBlock."""
         super().__init__()
 
+        activation_layer = ACTIVATION_FROM_NAME[activation]
+
         self.model = nn.Sequential(
             nn.Upsample(scale_factor=2, mode="nearest"),
             nn.Conv2d(in_channels, out_channels, kernel_size=2, padding="same"),
-            nn.ReLU(inplace=True),
+            activation_layer(inplace=True),
         )
 
     def forward(self, x: Tensor) -> Tensor: