diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..ace14d14f2 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,101 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +cmake_minimum_required(VERSION 3.19) +project(torchao_core CUDA CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +# Find PyTorch package +find_package(Torch REQUIRED) + +# Global compile definitions +add_compile_definitions(Py_LIMITED_API=0x03090000) + +# Set compiler flags based on platform and build type +if(MSVC) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options(/Od /ZI /DEBUG) + else() + add_compile_options(/O2 /permissive-) + endif() +else() + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options(-g -O0) + if(CMAKE_CUDA_COMPILER) + add_compile_options($<$:-g>) + endif() + else() + add_compile_options(-O3) + if(CMAKE_CUDA_COMPILER) + add_compile_options($<$:-O3>) + endif() + endif() + + # Add color diagnostics for non-Windows builds + add_compile_options(-fdiagnostics-color=always) +endif() + +# Include directories +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +# CUDA Setup +if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + add_definitions(-DTORCH_CUDA_AVAILABLE) + + # Set CUDA architectures if not already set + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70 75 80 86) + endif() + + # Set CUDA flags + # Set CUDA architectures and TORCH_CUDA_ARCH_LIST + if(NOT DEFINED TORCH_CUDA_ARCH_LIST) + set(TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0;8.6;9.0") + endif() + + # CUTLASS support for non-Windows CUDA builds + if(NOT WIN32) + set(CUTLASS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/cutlass) + if(EXISTS ${CUTLASS_DIR}) + add_definitions(-DTORCHAO_USE_CUTLASS) + include_directories(${CUTLASS_DIR}/include) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DTORCHAO_USE_CUTLASS -I${CUTLASS_DIR}/include") + endif() + endif() +endif() + +# Find source files +file(GLOB_RECURSE CPP_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/torchao/csrc/**/*.cpp") +if(CMAKE_CUDA_COMPILER) + file(GLOB_RECURSE CUDA_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/torchao/csrc/cuda/**/*.cu") +endif() + +# Create the core library +add_library(torchao_core SHARED + ${CPP_SOURCES} + ${CUDA_SOURCES} +) + +target_link_libraries(torchao_core PRIVATE + ${TORCH_LIBRARIES} +) + +# Set Python limited API version +target_compile_definitions(torchao_core PRIVATE Py_LIMITED_API=0x03090000) + +# Installation +install(TARGETS torchao_core + LIBRARY DESTINATION lib + RUNTIME DESTINATION lib +) diff --git a/setup.py b/setup.py index 8628dc7ef4..3916da7965 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,4 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. - +import platform import glob import os import subprocess @@ -10,13 +6,14 @@ import time from datetime import datetime +import torch +from torch.utils.cpp_extension import BuildExtension, CUDA_HOME, IS_WINDOWS from setuptools import Extension, find_packages, setup current_date = datetime.now().strftime("%Y%m%d") PY3_9_HEXCODE = "0x03090000" - def get_git_commit_id(): try: return ( @@ -27,77 +24,41 @@ def get_git_commit_id(): except Exception: return "" - def read_requirements(file_path): with open(file_path, "r") as file: return file.read().splitlines() - def read_version(file_path="version.txt"): with open(file_path, "r") as file: return file.readline().strip() - # Use Git commit ID if VERSION_SUFFIX is not set version_suffix = os.getenv("VERSION_SUFFIX") if version_suffix is None: version_suffix = f"+git{get_git_commit_id()}" -use_cpp = os.getenv("USE_CPP") - -import platform - -build_torchao_experimental = ( - use_cpp == "1" - and platform.machine().startswith("arm64") - and platform.system() == "Darwin" -) - version_prefix = read_version() -# Version is version.dev year month date if using nightlies and version if not version = ( f"{version_prefix}.dev{current_date}" if os.environ.get("TORCHAO_NIGHTLY") else version_prefix ) - -def use_debug_mode(): - return os.getenv("DEBUG", "0") == "1" - - -import torch -from torch.utils.cpp_extension import ( - CUDA_HOME, - IS_WINDOWS, - BuildExtension, - CppExtension, - CUDAExtension, -) - -# Constant known variables used throughout this file -cwd = os.path.abspath(os.path.curdir) -third_party_path = os.path.join(cwd, "third_party") - - def get_submodule_folders(): - git_modules_path = os.path.join(cwd, ".gitmodules") + git_modules_path = os.path.join(os.path.abspath(os.path.curdir), ".gitmodules") default_modules_path = [ - os.path.join(third_party_path, name) - for name in [ - "cutlass", - ] + os.path.join("third_party", name) + for name in ["cutlass"] ] if not os.path.exists(git_modules_path): return default_modules_path with open(git_modules_path) as f: return [ - os.path.join(cwd, line.split("=", 1)[1].strip()) + os.path.join(line.split("=", 1)[1].strip()) for line in f if line.strip().startswith("path") ] - def check_submodules(): def check_for_files(folder, files): if not any(os.path.exists(os.path.join(folder, f)) for f in files): @@ -113,13 +74,12 @@ def not_exists_or_empty(folder): if bool(os.getenv("USE_SYSTEM_LIBS", False)): return folders = get_submodule_folders() - # If none of the submodule folders exists, try to initialize them if all(not_exists_or_empty(folder) for folder in folders): try: print(" --- Trying to initialize submodules") start = time.time() subprocess.check_call( - ["git", "submodule", "update", "--init", "--recursive"], cwd=cwd + ["git", "submodule", "update", "--init", "--recursive"] ) end = time.time() print(f" --- Submodule initialization took {end - start:.2f} sec") @@ -140,19 +100,16 @@ def not_exists_or_empty(folder): ], ) +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=""): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) -# BuildExtension is a subclass of from setuptools.command.build_ext.build_ext class TorchAOBuildExt(BuildExtension): - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - def build_extensions(self): - cmake_extensions = [ - ext for ext in self.extensions if isinstance(ext, CMakeExtension) - ] - other_extensions = [ - ext for ext in self.extensions if not isinstance(ext, CMakeExtension) - ] + cmake_extensions = [ext for ext in self.extensions if isinstance(ext, CMakeExtension)] + other_extensions = [ext for ext in self.extensions if not isinstance(ext, CMakeExtension)] + for ext in cmake_extensions: self.build_cmake(ext) @@ -160,134 +117,91 @@ def build_extensions(self): self.extensions = other_extensions super().build_extensions() - self.extensions = other_extensions + cmake_extensions - def build_cmake(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + debug_mode = os.getenv("DEBUG", "0") == "1" + build_type = "Debug" if debug_mode else "Release" - build_type = "Debug" if use_debug_mode() else "Release" + # Get PyTorch's cmake directory + torch_cmake_dir = os.path.join(torch.utils.cmake_prefix_path, "Torch") - from distutils.sysconfig import get_python_lib + # Try to find Ninja + try: + subprocess.check_output(['ninja', '--version']) + use_ninja = True + except (subprocess.SubprocessError, FileNotFoundError): + use_ninja = False + + # Build the cmake arguments + cmake_args = [ + f"-DCMAKE_BUILD_TYPE={build_type}", + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}", + f"-DTorch_DIR={torch_cmake_dir}", + f"-DUSE_CUDA={'ON' if torch.cuda.is_available() and CUDA_HOME else 'OFF'}", + ] - torch_dir = get_python_lib() + "/torch/share/cmake/Torch" + # Add CUDA architecture flags if CUDA is enabled + if torch.cuda.is_available() and CUDA_HOME: + # Get CUDA compute capability of the current GPU + capability = torch.cuda.get_device_capability() + arch_list = f"{capability[0]}.{capability[1]}" + cmake_args.append(f"-DTORCH_CUDA_ARCH_LIST={arch_list}") - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) + # Add Ninja generator if available + if use_ninja: + cmake_args += ["-GNinja"] + + build_temp = os.path.join(self.build_temp, ext.name) + if not os.path.exists(build_temp): + os.makedirs(build_temp) subprocess.check_call( - [ - "cmake", - ext.sourcedir, - "-DCMAKE_BUILD_TYPE=" + build_type, - "-DTORCHAO_BUILD_EXECUTORCH_OPS=OFF", - "-DTorch_DIR=" + torch_dir, - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, - ], - cwd=self.build_temp, + ["cmake", ext.sourcedir] + cmake_args, cwd=build_temp ) - subprocess.check_call(["cmake", "--build", "."], cwd=self.build_temp) - - -class CMakeExtension(Extension): - def __init__(self, name, sourcedir=""): - Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) + # Use ninja if available, otherwise default to standard build + if use_ninja: + subprocess.check_call(["ninja"], cwd=build_temp) + else: + subprocess.check_call( + ["cmake", "--build", ".", "--config", build_type], cwd=build_temp + ) def get_extensions(): - debug_mode = use_debug_mode() - if debug_mode: - print("Compiling in debug mode") - - if not torch.cuda.is_available(): - print( - "PyTorch GPU support is not available. Skipping compilation of CUDA extensions" - ) if CUDA_HOME is None and torch.cuda.is_available(): print("CUDA toolkit is not available. Skipping compilation of CUDA extensions") print( "If you'd like to compile CUDA extensions locally please install the cudatoolkit from https://anaconda.org/nvidia/cuda-toolkit" ) - use_cuda = torch.cuda.is_available() and CUDA_HOME is not None - extension = CUDAExtension if use_cuda else CppExtension - - extra_link_args = [] - extra_compile_args = { - "cxx": [f"-DPy_LIMITED_API={PY3_9_HEXCODE}"], - "nvcc": [ - "-O3" if not debug_mode else "-O0", - "-t=0", - ], - } - - if not IS_WINDOWS: - extra_compile_args["cxx"].extend( - ["-O3" if not debug_mode else "-O0", "-fdiagnostics-color=always"] - ) - - if debug_mode: - extra_compile_args["cxx"].append("-g") - extra_compile_args["nvcc"].append("-g") - extra_link_args.extend(["-O0", "-g"]) - else: - extra_compile_args["cxx"].extend( - ["/O2" if not debug_mode else "/Od", "/permissive-"] - ) - - if debug_mode: - extra_compile_args["cxx"].append("/ZI") - extra_compile_args["nvcc"].append("-g") - extra_link_args.append("/DEBUG") - - use_cutlass = False - if use_cuda and not IS_WINDOWS: - use_cutlass = True - cutlass_dir = os.path.join(third_party_path, "cutlass") - cutlass_include_dir = os.path.join(cutlass_dir, "include") - if use_cutlass: - extra_compile_args["nvcc"].extend( - [ - "-DTORCHAO_USE_CUTLASS", - "-I" + cutlass_include_dir, - ] - ) - - this_dir = os.path.dirname(os.path.curdir) - extensions_dir = os.path.join(this_dir, "torchao", "csrc") - sources = list(glob.glob(os.path.join(extensions_dir, "**/*.cpp"), recursive=True)) - - extensions_cuda_dir = os.path.join(extensions_dir, "cuda") - cuda_sources = list( - glob.glob(os.path.join(extensions_cuda_dir, "**/*.cu"), recursive=True) + # Check for experimental build conditions + build_torchao_experimental = ( + os.getenv("USE_CPP") == "1" + and platform.machine().startswith("arm64") + and platform.system() == "Darwin" ) - if use_cuda: - sources += cuda_sources - + use_cpp = os.getenv("USE_CPP") ext_modules = [] - if len(sources) > 0: + + if use_cpp != "0": ext_modules.append( - extension( + CMakeExtension( "torchao._C", - sources, - py_limited_api=True, - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, + sourcedir=".", ) ) - if build_torchao_experimental: - ext_modules.append( - CMakeExtension( - "torchao.experimental", - sourcedir="torchao/experimental", + if build_torchao_experimental: + ext_modules.append( + CMakeExtension( + "torchao.experimental", + sourcedir="torchao/experimental", + ) ) - ) return ext_modules - check_submodules() setup( @@ -298,7 +212,7 @@ def get_extensions(): package_data={ "torchao.kernel.configs": ["*.pkl"], }, - ext_modules=get_extensions() if use_cpp != "0" else None, + ext_modules=get_extensions(), extras_require={"dev": read_requirements("dev-requirements.txt")}, description="Package for applying ao techniques to GPU models", long_description=open("README.md").read(),