Source code for aitemplate.frontend.nn.conv3d

#  Copyright (c) Meta Platforms, Inc. and affiliates.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
"""
conv3d Module.
"""

from aitemplate.compiler.ops import conv3d, conv3d_bias, depthwise_conv3d
from aitemplate.compiler.ops.padding.ndhwc3to8 import ndhwc3to8
from aitemplate.frontend.nn.module import Module
from aitemplate.frontend.nn.parameter import Parameter

# pylint: disable=C0103


[docs]class Conv3d(Module):
    r"""Applies a 3D convolution over an input signal composed of several input
    planes.

    * :attr:`stride` controls the stride for the cross-correlation.

    * :attr:`padding` controls the amount of padding applied to the input.

    * :attr:`dilation` controls the spacing between the kernel points; also
      known as the à trous algorithm. It is harder to describe, but this `link`_
      has a nice visualization of what :attr:`dilation` does.

    Args:
        in_channels (int): Number of channels in the input image
        out_channels (int): Number of channels produced by the convolution
        kernel_size (int or Tuple(int)): Size of the convolving kernel
        stride (int or Tuple(int)): Stride of the convolution
        padding (int or Tuple(int), optional): Padding added to all four sides of
            the input. Default: 0
        dilation (int or Tuple(int), optional): Spacing between kernel elements. Default: 1
        groups (int, optional): Number of blocked connections from input
            channels to output channels. Default: 1
        dtype (string, optional): Data type. Default: "float16"
        bias (bool, optional): Has bias or not. Default: False (Note that we only support bias for depthwise_conv3d for now)

    Shape:
        - Input: :math:`(N, D_{in}, H_{in}, W_{in}, C_{in})`
        - Output: :math:`(N, D_{out}, H_{out}, W_{out}, C_{out})`, where

          .. math::
              D_{out} = \left\lfloor\frac{D_{in}  + 2 \times \text{padding} - \text{dilation}
                        \times (\text{kernel_size} - 1) - 1}{\text{stride}} + 1\right\rfloor

          .. math::
              H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding} - \text{dilation}
                        \times (\text{kernel_size} - 1) - 1}{\text{stride}} + 1\right\rfloor

          .. math::
              W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding} - \text{dilation}
                        \times (\text{kernel_size} - 1) - 1}{\text{stride}} + 1\right\rfloor

    Attributes:
        weight (Tensor): the learnable weights of the module of shape
            :math:`(\text{out_channels}, \text{kernel_size}[0], \text{kernel_size}[1], \text{kernel_size}[2], `
            :math:`\frac{\text{in_channels}}{\text{groups}})`.

    Examples::

        >>> m = nn.Conv3d(16, 33, 3, 2)
        >>> input = Tensor(shape=[20, 50, 100, 100, 16])
        >>> output = m(input)

    .. _cross-correlation:
        https://en.wikipedia.org/wiki/Cross-correlation

    .. _link:
        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding=0,
        dilation=1,
        groups=1,
        dtype="float16",
        bias=False,
    ):
        super().__init__()
        self.has_bias = bias

        if isinstance(kernel_size, int):
            kernel_size = (kernel_size, kernel_size, kernel_size)
        self.weight = Parameter(
            shape=[out_channels, *kernel_size, in_channels // groups],
            dtype=dtype,
        )
        if self.has_bias:
            self.bias = Parameter(shape=[out_channels], dtype=dtype)

        if groups == 1:
            if self.has_bias:
                self.op = conv3d_bias(
                    stride=stride, pad=padding, dilate=dilation, group=groups
                )
            else:
                self.op = conv3d(
                    stride=stride, pad=padding, dilate=dilation, group=groups
                )
        else:
            self.op = depthwise_conv3d(
                stride=stride, pad=padding, dilate=dilation, group=groups, bias=bias
            )

[docs]    def forward(self, *args):
        """Applies Conv3d on the input tensor."""
        assert len(args) == 1
        x = args[0]

        if self.has_bias:
            x = ndhwc3to8()(x)
            weight = ndhwc3to8()(self.weight.tensor())
            return self.op(x, weight, self.bias.tensor())
        else:
            return self.op(x, self.weight.tensor())