Cornell-Tech-ML
diff --git a/‎.DS_Store
8 KB b/‎.DS_Store
8 KB
diff --git a/‎a.txt
Lines changed: 0 additions & 8 deletions b/‎a.txt
Lines changed: 0 additions & 8 deletions
diff --git a/‎minitorch/__init__.py
Lines changed: 2 additions & 0 deletions b/‎minitorch/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎minitorch/fast_conv.py
Lines changed: 71 additions & 16 deletions b/‎minitorch/fast_conv.py
Lines changed: 71 additions & 16 deletions
diff --git a/‎minitorch/nn.py
Lines changed: 218 additions & 1 deletion b/‎minitorch/nn.py
Lines changed: 218 additions & 1 deletion
@@ -1,3 +1,5 @@
+"""Public packages for minitorch"""
+
 from .testing import MathTest, MathTestVariable  # type: ignore # noqa: F401,F403
 from .datasets import *  # noqa: F401,F403
 from .optim import *  # noqa: F401,F403
 
@@ -1,14 +1,11 @@
 from typing import Tuple, TypeVar, Any
 
-import numpy as np
 from numba import prange
 from numba import njit as _njit
 
 from .autodiff import Context
 from .tensor import Tensor
 from .tensor_data import (
-    MAX_DIMS,
-    Index,
     Shape,
     Strides,
     Storage,
@@ -22,6 +19,26 @@
 
 
 def njit(fn: Fn, **kwargs: Any) -> Fn:
+    """Compile a Python function into a Numba-optimized function using `njit`.
+
+    This function compiles the given Python function `fn` into a Numba-optimized
+    version with just-in-time compilation. The `inline="always"` option ensures
+    that the compiled function is always inlined into the calling code, improving
+    performance by reducing function call overhead.
+
+    Parameters
+    ----------
+    fn : Fn
+        The Python function to be compiled.
+    **kwargs : Any
+        Additional arguments for the Numba `njit` compiler.
+
+    Returns
+    -------
+    Fn
+        The Numba-optimized version of the input function.
+
+    """
     return _njit(inline="always", **kwargs)(fn)  # type: ignore
 
 
@@ -98,7 +115,7 @@ def _tensor_conv1d(
                 for ic in range(in_channels):
                     for k in range(kw):
                         iw = w + k if not reverse else w - k
-                        if 0 <= iw < width: 
+                        if 0 <= iw < width:
                             input_idx = b * s1[0] + ic * s1[1] + iw * s1[2]
                             weight_idx = oc * s2[0] + ic * s2[1] + k * s2[2]
                             acc += input[input_idx] * weight[weight_idx]
@@ -139,6 +156,25 @@ def forward(ctx: Context, input: Tensor, weight: Tensor) -> Tensor:
 
     @staticmethod
     def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]:
+        """Compute the gradients for 1D Convolution.
+
+        Parameters
+        ----------
+        ctx : Context
+            Context object containing saved tensors from the forward pass.
+        grad_output : Tensor
+            Gradient of the loss with respect to the output of the convolution.
+
+        Returns
+        -------
+        Tuple[Tensor, Tensor]
+            A tuple containing:
+            - grad_input: Gradient of the loss with respect to the input tensor.
+              Shape: [batch, in_channels, width]
+            - grad_weight: Gradient of the loss with respect to the weight tensor.
+              Shape: [out_channels, in_channels, kernel_width]
+
+        """
         input, weight = ctx.saved_values
         batch, in_channels, w = input.shape
         out_channels, in_channels, kw = weight.shape
@@ -215,7 +251,8 @@ def _tensor_conv2d(
         reverse (bool): anchor weight at top-left or bottom-right
 
     """
-    batch_, out_channels, _, _ = out_shape
+    # batch_, out_channels, _, _ = out_shape
+    batch_, out_channels, out_height, out_width = out_shape
     batch, in_channels, height, width = input_shape
     out_channels_, in_channels_, kh, kw = weight_shape
 
@@ -232,32 +269,30 @@ def _tensor_conv2d(
     s20, s21, s22, s23 = s2[0], s2[1], s2[2], s2[3]
 
     # TODO: Implement for Task 4.2.
-    o_s0, o_s1, o_s2, o_s3 = out_strides
+    s30, s31, s32, s33 = out_strides
 
     for b in prange(batch):
         for oc in range(out_channels):
-            for h in range(height):
-                for w in range(width):
+            for oh in range(out_height):
+                for ow in range(out_width):
                     acc = 0.0
                     for ic in range(in_channels):
                         for kh_idx in range(kh):
                             for kw_idx in range(kw):
-                                ih = h + kh_idx if not reverse else h - kh_idx
-                                iw = w + kw_idx if not reverse else w - kw_idx
+                                ih = oh + kh_idx if not reverse else oh - kh_idx
+                                iw = ow + kw_idx if not reverse else ow - kw_idx
+
                                 if 0 <= ih < height and 0 <= iw < width:
-                                    input_idx = (
-                                        b * s10 + ic * s11 + ih * s12 + iw * s13
-                                    )
+                                    input_idx = b * s10 + ic * s11 + ih * s12 + iw * s13
                                     weight_idx = (
                                         oc * s20
                                         + ic * s21
                                         + kh_idx * s22
                                         + kw_idx * s23
                                     )
                                     acc += input[input_idx] * weight[weight_idx]
-                    out_idx = (
-                        b * o_s0 + oc * o_s1 + h * o_s2 + w * o_s3
-                    )
+
+                    out_idx = b * s30 + oc * s31 + oh * s32 + ow * s33
                     out[out_idx] = acc
 
 
@@ -292,6 +327,26 @@ def forward(ctx: Context, input: Tensor, weight: Tensor) -> Tensor:
 
     @staticmethod
     def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]:
+        """Compute the gradients for 2D Convolution.
+
+        Parameters
+        ----------
+        ctx : Context
+            Context object containing saved tensors from the forward pass.
+        grad_output : Tensor
+            Gradient of the loss with respect to the output of the convolution.
+            Shape: [batch, out_channels, height, width]
+
+        Returns
+        -------
+        Tuple[Tensor, Tensor]
+            A tuple containing:
+            - grad_input: Gradient of the loss with respect to the input tensor.
+              Shape: [batch, in_channels, height, width]
+            - grad_weight: Gradient of the loss with respect to the weight tensor.
+              Shape: [out_channels, in_channels, kernel_height, kernel_width]
+
+        """
         input, weight = ctx.saved_values
         batch, in_channels, h, w = input.shape
         out_channels, in_channels, kh, kw = weight.shape
 
@@ -36,7 +36,224 @@ def tile(input: Tensor, kernel: Tuple[int, int]) -> Tuple[Tensor, int, int]:
     assert height % kh == 0
     assert width % kw == 0
     # TODO: Implement for Task 4.3.
-    raise NotImplementedError("Need to implement for Task 4.3")
+    input = input.contiguous()
+
+    new_height = height // kh
+    new_width = width // kw
+    out = input.view(batch, channel, new_height, kh, new_width, kw)
+    out = out.permute(0, 1, 2, 4, 3, 5)
+    out = out.contiguous()
+    out = out.view(batch, channel, new_height, new_width, kh * kw)
+
+    return out, new_height, new_width
 
 
 # TODO: Implement for Task 4.3.
+def avgpool2d(input: Tensor, kernel: Tuple[int, int]) -> Tensor:
+    """Apply average pooling on the 2D input tensor.
+
+    Parameters
+    ----------
+    input : Tensor
+        Input tensor with shape [batch, channels, height, width].
+    kernel : Tuple[int, int]
+        Tuple specifying the height and width of the pooling kernel.
+
+    Returns
+    -------
+    Tensor
+        Tensor after applying average pooling, with reduced height and width
+        depending on the kernel size.
+        Shape: [batch, channels, new_height, new_width]
+
+    """
+    tiled, new_height, new_width = tile(input, kernel)
+    pooled = tiled.mean(dim=4)
+    return pooled.view(input.shape[0], input.shape[1], new_height, new_width)
+
+
+class Max(Function):
+    @staticmethod
+    def forward(ctx: Context, t: Tensor, dim: Tensor) -> Tensor:
+        """Compute the maximum values along a specified dimension.
+
+        Parameters
+        ----------
+        ctx : Context
+            Context object for storing intermediate values for the backward pass.
+        t : Tensor
+            Input tensor.
+        dim : Tensor
+            Dimension along which to compute the maximum.
+
+        Returns
+        -------
+        Tensor
+            A tensor containing the maximum values along the specified dimension.
+
+        """
+        d = int(dim.item())
+        res = FastOps.reduce(operators.max, start=-1e30)(t, d)
+        ctx.save_for_backward(t, dim, res)
+        return res
+
+    @staticmethod
+    def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, float]:
+        """Compute the gradient of the max operation.
+
+        Parameters
+        ----------
+        ctx : Context
+            Context object containing saved values from the forward pass.
+        grad_output : Tensor
+            Gradient of the loss with respect to the output of the max operation.
+
+        Returns
+        -------
+        Tuple[Tensor, float]
+            - Gradient of the loss with respect to the input tensor.
+            - A float representing the gradient with respect to the dimension, which is always 0.
+
+        """
+        t, dim, max_val = ctx.saved_values
+        d = int(dim.item())
+        mask = t == max_val
+        sum_mask = mask.sum(dim=d)
+        grad_input = mask * (grad_output / sum_mask)
+        return grad_input, 0.0
+
+
+def max(t: Tensor, dim: int) -> Tensor:
+    """Apply the max function along a specified dimension.
+
+    Parameters
+    ----------
+    t : Tensor
+        Input tensor.
+    dim : int
+        Dimension along which to compute the maximum.
+
+    Returns
+    -------
+    Tensor
+        Tensor containing the maximum values along the specified dimension.
+
+    """
+    return Max.apply(t, tensor(dim))
+
+
+def argmax(t: Tensor, dim: int) -> Tensor:
+    """Compute the indices of the maximum values along a specified dimension.
+
+    Parameters
+    ----------
+    t : Tensor
+        Input tensor.
+    dim : int
+        Dimension along which to compute the indices of the maximum.
+
+    Returns
+    -------
+    Tensor
+        Tensor containing one-hot encoded indices of the maximum values along the specified dimension.
+
+    """
+    m = max(t, dim)
+    expand_shape = list(m.shape)
+    expand_shape.insert(dim, t.shape[dim])
+    mask = t == m
+    return mask
+
+
+def softmax(t: Tensor, dim: int) -> Tensor:
+    """Compute the softmax along a specified dimension.
+
+    Parameters
+    ----------
+    t : Tensor
+        Input tensor.
+    dim : int
+        Dimension along which to compute the softmax.
+
+    Returns
+    -------
+    Tensor
+        Tensor containing the softmax probabilities along the specified dimension.
+
+    """
+    exp_t = t.exp()
+    sum_exp = exp_t.sum(dim=dim)
+    return exp_t / sum_exp
+
+
+def logsoftmax(t: Tensor, dim: int) -> Tensor:
+    """Compute the log of the softmax along a specified dimension.
+
+    Parameters
+    ----------
+    t : Tensor
+        Input tensor.
+    dim : int
+        Dimension along which to compute the logsoftmax.
+
+    Returns
+    -------
+    Tensor
+        Tensor containing the log of the softmax probabilities along the specified dimension.
+
+    """
+    m = max(t, dim=dim)
+    log_sum_exp = ((t - m).exp().sum(dim=dim)).log() + m
+    return t - log_sum_exp
+
+
+def maxpool2d(input: Tensor, kernel: Tuple[int, int]) -> Tensor:
+    """Apply max pooling on the 2D input tensor.
+
+    Parameters
+    ----------
+    input : Tensor
+        Input tensor with shape [batch, channels, height, width].
+    kernel : Tuple[int, int]
+        Tuple specifying the height and width of the pooling kernel.
+
+    Returns
+    -------
+    Tensor
+        Tensor after applying max pooling, with reduced height and width
+        depending on the kernel size.
+        Shape: [batch, channels, new_height, new_width]
+
+    """
+    tiled, new_height, new_width = tile(input, kernel)
+    pooled = max(tiled, dim=4)
+    return pooled.view(input.shape[0], input.shape[1], new_height, new_width)
+
+
+def dropout(input: Tensor, p: float = 0.5, ignore: bool = False) -> Tensor:
+    """Apply dropout regularization to the input tensor.
+
+    Parameters
+    ----------
+    input : Tensor
+        Input tensor.
+    p : float, optional
+        Dropout probability (default is 0.5).
+    ignore : bool, optional
+        If True, bypass dropout (default is False).
+
+    Returns
+    -------
+    Tensor
+        Tensor with randomly zeroed elements scaled by 1 / (1 - p) to maintain expected value.
+
+    """
+    if p == 1.0:
+        if not ignore:
+            return input.zeros(input.shape)
+        else:
+            return input
+    if ignore:
+        return input
+    mask = rand(input.shape, backend=input.backend) > p
+    return input * mask * (1.0 / (1 - p))
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+"""Public packages for minitorch"""`
	`2`	`+`
`1`	`3`	`from .testing import MathTest, MathTestVariable # type: ignore # noqa: F401,F403`
`2`	`4`	`from .datasets import * # noqa: F401,F403`
`3`	`5`	`from .optim import * # noqa: F401,F403`