Source code for aitemplate.compiler.ops.gemm_special.bmm_rrr_k1_tanh

#  Copyright (c) Meta Platforms, Inc. and affiliates.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
"""
Operator definition for bmm_rrr_k1_tanh.
"""
from typing import List

from aitemplate.compiler.base import IntVar, Tensor
from aitemplate.compiler.ops.gemm_universal import bmm_rrr

# pylint: disable=C0103,W0221,C0200


[docs]class bmm_rrr_k1_tanh(bmm_rrr): def __init__(self): super().__init__() self._attrs["op"] = "bmm_rrr_k1_tanh" self._attrs["f_ab_alignment"] = True self._attrs["has_profiler"] = False def _infer_shapes(self, a: Tensor, b: Tensor) -> List[IntVar]: """Given input tensors, infers output tensor shapes.""" a_shapes = a._attrs["shape"] if len(a_shapes) != 3: raise RuntimeError( "bmm operand A should have 3 dimensions! Current shape: {}.".format( a_shapes ) ) b_shapes = b._attrs["shape"] if len(b_shapes) != 3: raise RuntimeError( "bmm operand B should have 3 dimensions! Current shape: {}.".format( b_shapes ) ) batch_size_a = a_shapes[0] batch_size_b = b_shapes[0] if batch_size_a != batch_size_b: raise RuntimeError( "bmm operand A and B should have same batch_size! " "Current shape A: {} shape B: {} .".format(a_shapes, b_shapes) ) assert ( a_shapes[2] == b_shapes[1] ), f"bmm operand A and B should have same K dim! Current shape A: {a_shapes}, shape B: {b_shapes}" m_values = a_shapes[1]._attrs["values"] # TODO: remove shape check after fixing the kernel assert all( val % 8 == 0 for val in m_values ), f"M should be multiples of 8. M: {a_shapes[1]}" n_values = b_shapes[2]._attrs["values"] assert all( val % 8 == 0 for val in n_values ), f"N should be multiples of 8. N: {b_shapes[2]}" c_shapes = [batch_size_a, a_shapes[1], b_shapes[2]] return c_shapes def __call__(self, a: Tensor, b: Tensor) -> List[Tensor]: self._attrs["inputs"] = [a, b] self._set_depth() output_shape = self._infer_shapes(a, b) output = Tensor(output_shape, src_ops={self}, dtype=a.dtype()) self._attrs["outputs"] = [output] return output
[docs] def gen_profiler( self, workdir: str = None, dynamic_profiling_strategy=None ) -> None: """This kernel does not require profiling.""" return