Source code for aitemplate.utils.environ

#  Copyright (c) Meta Platforms, Inc. and affiliates.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
"""
A common place for holding AIT-related env control variables
"""

import logging
import os
from typing import Optional


_LOGGER = logging.getLogger(__name__)


def get_compiler_opt_level() -> str:
    # The reason: it is typical in our situation that an option
    # --optimize <level> (-Ox) is for a HOST compiler. And -O3 does
    # literally nothing except for the enormous compilation time.
    #
    # So, it is safe to allow users to override this option in order
    # to significantly speedup the computations / testing, especially
    # for very large models.
    compiler_opt = os.getenv("AIT_COMPILER_OPT", "-O3")

    return compiler_opt


def use_fast_math() -> bool:
    """
    Whether the fast math option should be used for the device code generation.
    Fast math implies the use of approximate math operations (say,
    a division operation), allowing to gain speed at the cost of accuracy.
    Default value to get from environment variable is "1".
    """
    return os.getenv("AIT_USE_FAST_MATH", "1") == "1"


def use_tanh_for_sigmoid() -> bool:
    """
    Whether the we want to use tanh to approximate sigmoid for the device code generation.
    This controls both the code generation for AITemplate codegen and CUTLASS.
    Default value to get from environment variable is "0".
    """
    return os.getenv("AIT_USE_TANH_FOR_SIGMOID", "0") == "1"


def enable_cuda_lto() -> bool:
    """
    nvcc will use LTO flags during compilation
    Default value is "0".
    """
    return os.getenv("AIT_ENABLE_CUDA_LTO", "0") == "1"


def nvcc_ccbin() -> str:
    """
    nvcc host compiler (ccbin)
    """
    return os.getenv("AIT_NVCC_CCBIN", "")


[docs]def force_profiler_cache() -> bool: """ Force the profiler to use the cached results. The profiler will throw a runtime exception if it cannot find cached results. This env may be useful to capture any cache misses due to cache version updates or other relevant code changes. """ force_cache = os.environ.get("AIT_FORCE_PROFILER_CACHE", None) == "1" if force_cache: assert ( os.environ.get("FORCE_PROFILE", None) != "1" ), "cannot specify both AIT_FORCE_PROFILER_CACHE and FORCE_PROFILE" return force_cache
def time_compilation() -> bool: """ When enabled, time each make command at compilation time. This helps us doing compilation time analysis. Requires to install "time". """ return os.getenv("AIT_TIME_COMPILATION", "0") == "1"
[docs]def shorten_tensor_names_for_plots() -> bool: """ When enabled, long tensor names will be replaced with a hash string, making the graph representation significantly simpler. """ return os.getenv("AIT_PLOT_SHORTEN_TENSOR_NAMES", "0") == "1"
def ait_build_cache_dir() -> Optional[str]: """ When set to a non-empty string, cache the build artifacts below this directory for significantly faster builds. See aitemplate.backend.build_cache Returns: Optional[str]: Value of AIT_BUILD_CACHE_DIR environment variable, or None if not set. """ return os.environ.get("AIT_BUILD_CACHE_DIR", None) def ait_build_cache_skip_percentage() -> int: """ When set to a non-empty string, and if AIT_BUILD_CACHE_DIR is set, the build cache will be skipped randomly with a probability correspinding to the specified percentage Returns: int: Integer value of AIT_BUILD_CACHE_SKIP_PERCENTAGE environment variable, or 5 if not set. """ return int(os.environ.get("AIT_BUILD_CACHE_SKIP_PERCENTAGE", "30")) def ait_build_cache_skip_profiler() -> bool: """ boolean value of AIT_BUILD_CACHE_SKIP_PROFILER environment variable. Will return True if that variable is not set, if it is equal to "0", an empty string or "False" ( case insensitive ). Will return True in all other cases. """ ret = os.environ.get("AIT_BUILD_CACHE_SKIP_PROFILER", "1") if ret is None or ret == "" or ret == "0" or ret.lower() == "false": return False return True def ait_build_cache_max_mb() -> int: """ boolean value of AIT_BUILD_CACHE_MAX_MB environment variable. This determines the maximum size of the artifact data to be cached in MB. For larger (raw, uncompressed) data the build cache will be skipped. Defaults to 30. """ return int(os.environ.get("AIT_BUILD_CACHE_MAX_MB", "30")) def allow_cutlass_sm90_kernels() -> bool: """ Whether the SM90 CUTLASS kernels should to be considered alongside the SM80 CUTLASS kernels on the CUDA arch 90 (for the CUDA back-end of the GEMM ops). Default: False. """ return ( force_cutlass_sm90_kernels() or os.getenv("AIT_ALLOW_CUTLASS_SM90_KERNELS", "0") == "1" ) def force_cutlass_sm90_kernels() -> bool: """ Whether only the SM90 CUTLASS kernels (and not the SM80 ones) should be considered on the CUDA arch 90 (for the CUDA back-end of the GEMM ops). Default: False. """ return os.getenv("AIT_FORCE_CUTLASS_SM90_KERNELS", "0") == "1"
[docs]def multistream_mode() -> int: """ Multi-stream mode. 0 - no multistream. 1 - simple multistream. Default: 0. """ # temporarily override it in order to test return int(os.getenv("AIT_MULTISTREAM_MODE", "0"))
[docs]def multistream_additional_streams() -> int: """ Number of extra streams in multi-stream mode. This option is independent from AIT_MULTISTREAM_MAX_MEM_PARALLEL_OPS. For example, say, there are 100 ops that can be run in parallel. Example 1: AIT_MULTISTREAM_EXTRA_STREAMS=4 and AIT_MULTISTREAM_MAX_MEM_PARALLEL_OPS=100. In this case 5 streams will be used (1 base and 4 extra), every stream gets 20 operators and no inter-stream barriers are used. Memory planning is done for 100 parallel ops. Example 2: AIT_MULTISTREAM_EXTRA_STREAMS=4 and AIT_MULTISTREAM_MAX_MEM_PARALLEL_OPS=5. In this case 5 streams will be used (1 base and 4 extra), there will be 20 waves separated by inter-stream barriers, every stream gets 1 operator for every wave. Memory planning is done for 20 waves of 5 parallel ops each. """ return int(os.getenv("AIT_MULTISTREAM_EXTRA_STREAMS", "4"))
[docs]def multistream_max_mem_parallel_ops() -> int: """ Maximum number of parallel operators used in memory planning for simple multi-stream mode. Larger value imply higher level of possible parallelism, but higher memory allocations. This option is independent from AIT_MULTISTREAM_EXTRA_STREAMS. For example, say, there are 100 ops that can be run in parallel. Example 1: AIT_MULTISTREAM_EXTRA_STREAMS=4 and AIT_MULTISTREAM_MAX_MEM_PARALLEL_OPS=100. In this case 5 streams will be used (1 base and 4 extra), every stream gets 20 operators and no inter-stream barriers are used. Memory planning is done for 100 parallel ops. Example 2: AIT_MULTISTREAM_EXTRA_STREAMS=4 and AIT_MULTISTREAM_MAX_MEM_PARALLEL_OPS=5. In this case 5 streams will be used (1 base and 4 extra), there will be 20 waves separated by inter-stream barriers, every stream gets 1 operator for every wave. Memory planning is done for 20 waves of 5 parallel ops each. """ # unlimited by default return int(os.getenv("AIT_MULTISTREAM_MAX_MEM_PARALLEL_OPS", "99999999"))
[docs]def is_cmake_compilation() -> bool: """ When enabled, compiles the model via invoking CMake rather than invoking make directly. """ # todo: replace with more builders? return os.getenv("AIT_USE_CMAKE_COMPILATION", "0") == "1"
def enable_standalone_exe_generation() -> bool: """ Whether to generate standalone binaries for AIT build directories. Defaults to False. """ return os.getenv("AIT_ENABLE_STANDALONE", "0") == "1" def enable_ptxas_info(): """ Whether to keep intermediate nvcc output files (including ptxas assembly) generated by nvcc, and generate verbose ptxas generation logs. Defaults to False. """ return os.getenv("AIT_ENABLE_PTXAS_INFO", "0") == "1" def enable_include_from_sourcetree(): """ Whether to include header files from source tree when building AIT model instead of placing them in a temp dir. Defaults to False. Only works with FBCUDA target when doing development / in-place builds. """ return os.getenv("AIT_ENABLE_INCLUDE_FROM_SOURCETREE", "0") == "1" def get_cuda_nvcc_debug_level(): """ Return level of CUDA debug information. Default to no debug info. backed by env var AIT_CUDA_DEBUG_LEVEL. Which may either be a string which is directly passed through to nvcc on the commandline, or an integer (as String) from 0 to 2 with the following meaning: - 0: No debug info ( default ) - 1: Line information. Good for stack traces and profiling. Optimizations can be enabled. - 2: Full debug information. WARNING: Level 2 disables all compiler optimizations, regardless of what else is passed as optimization level. """ level = os.getenv("AIT_CUDA_DEBUG_LEVEL", "0") return level def get_cutlass_debug_trace_level(): """ Return level of CUTLASS lib debug trace information. Default to no debug info. """ level = os.getenv("CUTLASS_DEBUG_TRACE_LEVEL", "0") return level def enable_cuda_source_navigation_fix(): """ When this flag is enabled, the FBCUDA Target will copy every *.cu file in build dirs into a corresponding *.cu.h file and create a *.cu file which just #include's this file. This fixes code navigation issues in some IDE's which don't treat .cu files as C++ files and disable code navigation. """ return os.getenv("AIT_ENABLE_CUDA_SOURCE_NAVIGATION_FIX", "0") == "1"