# Copyright 2022-2026 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 # ROCm implementation of caffe2/PyTorch backend EAPI=8 PYTHON_COMPAT=( python3_{11..14} ) ROCM_VERSION=6.1 inherit python-single-r1 cmake flag-o-matic prefix rocm # Source package is pytorch, not caffe2-rocm MY_PN=caffe2 MYPN=pytorch MYP=${MYPN}-${PV} CK_COMMIT=7fe50dc3da2069d6645d9deb8c017a876472a977 CK_P=composable_kernel-${CK_COMMIT:0:8} DESCRIPTION="A deep learning framework (ROCm backend)" HOMEPAGE="https://pytorch.org/" SRC_URI=" https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz -> ${MYP}.tar.gz https://github.com/ROCm/composable_kernel/archive/${CK_COMMIT}.tar.gz -> ${CK_P}.tar.gz " S="${WORKDIR}"/${MYP} LICENSE="BSD" SLOT="0" KEYWORDS="~amd64" IUSE="cusparselt distributed fbgemm flash gloo kineto memefficient mimalloc mkl mpi nccl nnpack +numpy onednn openblas opencl openmp qnnpack xnnpack" RESTRICT="test" REQUIRED_USE=" ${PYTHON_REQUIRED_USE} mpi? ( distributed ) gloo? ( distributed ) ${ROCM_REQUIRED_USE} memefficient? ( flash ) " RDEPEND=" ${PYTHON_DEPS} app-eselect/eselect-caffe2 dev-cpp/abseil-cpp:= dev-cpp/gflags:= >=dev-cpp/glog-0.5.0:= >=dev-libs/cpuinfo-2025.11.14 dev-libs/libfmt:= dev-libs/protobuf:= dev-libs/sleef sci-ml/onnx virtual/lapack nccl? ( >=dev-libs/rccl-6.3:= =dev-util/hip-6.3:= =dev-util/roctracer-6.3:= =sci-libs/hipBLAS-6.3:= =sci-libs/hipBLASLt-6.3:= =sci-libs/hipFFT-6.3:= =sci-libs/hipRAND-6.3:= =sci-libs/hipSOLVER-6.3:= =sci-libs/hipSPARSE-6.3:= =sci-libs/miopen-6.3:= =sci-libs/rocBLAS-6.3:= =sci-libs/rocRAND-6.3:= =sci-libs/rocSOLVER-6.3:= =dev-util/rocm-smi-6.3:= =dev-util/amdsmi-6.3:= =sci-libs/hipsparselt-6.3:= =sci-ml/FBGEMM-1.4 ) gloo? ( >=sci-ml/gloo-2025.06.04[rocm] ) kineto? ( ~sci-ml/kineto-0.4.0_p20260323 ) mimalloc? ( dev-libs/mimalloc ) mpi? ( virtual/mpi ) nnpack? ( sci-ml/NNPACK dev-libs/pthreadpool ) numpy? ( $(python_gen_cond_dep 'dev-python/numpy[${PYTHON_USEDEP}]') ) onednn? ( sci-ml/oneDNN ) opencl? ( virtual/opencl ) qnnpack? ( !sci-libs/QNNPACK sci-ml/gemmlowp dev-libs/pthreadpool ) xnnpack? ( >=sci-ml/XNNPACK-2024.11 dev-libs/pthreadpool ) mkl? ( sci-libs/mkl ) openblas? ( sci-libs/openblas ) " DEPEND=" ${RDEPEND} dev-cpp/nlohmann_json dev-libs/flatbuffers dev-libs/FXdiv dev-libs/pocketfft dev-libs/psimd sci-ml/FP16 $(python_gen_cond_dep ' =sci-libs/hipCUB-6.3:= =sci-libs/rocPRIM-6.3:= =sci-libs/rocThrust-6.3:= /dev/null || die flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die popd > /dev/null || die hprefixify \ aten/CMakeLists.txt \ caffe2/CMakeLists.txt \ cmake/Metal.cmake \ cmake/Modules/*.cmake \ cmake/Modules_CUDA_fix/FindCUDNN.cmake \ cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake \ cmake/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake \ cmake/public/LoadHIP.cmake \ cmake/public/cuda.cmake \ cmake/Dependencies.cmake \ torch/CMakeLists.txt \ CMakeLists.txt # ROCm-specific patches sed -e "s:/opt/rocm:/usr:" \ -e "s:lib/cmake:$(get_libdir)/cmake:g" \ -i cmake/public/LoadHIP.cmake || die sed -e "s:third_party/composable_kernel:../composable_kernel-${CK_COMMIT}:g" \ -i aten/src/ATen/CMakeLists.txt || die pushd "${WORKDIR}/composable_kernel-${CK_COMMIT}" > /dev/null || die eapply "${FILESDIR}"/composable-kernel-7fe50dc-expand-isa.patch popd > /dev/null || die sed -e 's/std::memcpy/memcpy/g' -i torch/headeronly/util/Half.h || die ebegin "HIPifying cuda sources" FBCODE_BUILD_TOOL="buck" ${EPYTHON} tools/amd_build/build_amd.py || die eend $? } src_configure() { export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)" if use memefficient; then export AOTRITON_INSTALLED_PREFIX="${ESYSROOT}/usr" fi local mycmakeargs=( -DCMAKE_INSTALL_PREFIX="${EPREFIX}${CAFFE2_PREFIX}" -DCMAKE_INSTALL_RPATH="${EPREFIX}${CAFFE2_PREFIX}/$(get_libdir)" -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DBUILD_CUSTOM_PROTOBUF=OFF -DBUILD_TEST=OFF -DLIBSHM_INSTALL_LIB_SUBDIR="${EPREFIX}${CAFFE2_PREFIX}/$(get_libdir)" -DPython_EXECUTABLE="${PYTHON}" -DTORCH_INSTALL_LIB_DIR="${EPREFIX}${CAFFE2_PREFIX}/$(get_libdir)" -DUSE_CCACHE=OFF -DUSE_CUDA=OFF -DUSE_ROCM=ON -DUSE_DISTRIBUTED=$(usex distributed) -DUSE_FBGEMM=$(usex fbgemm) -DUSE_FLASH_ATTENTION=$(usex flash) -DUSE_GFLAGS=ON -DUSE_GLOG=ON -DUSE_GLOO=$(usex gloo) -DUSE_ITT=OFF -DUSE_KINETO=$(usex kineto) -DUSE_KLEIDIAI=OFF -DUSE_MAGMA=OFF -DUSE_MEM_EFF_ATTENTION=$(usex memefficient) -DUSE_MIMALLOC=$(usex mimalloc) -DUSE_MKLDNN=$(usex onednn) -DUSE_MPI=$(usex mpi) -DUSE_NCCL=$(usex nccl) -DUSE_SYSTEM_NCCL=ON -DUSE_NNPACK=$(usex nnpack) -DUSE_NUMA=OFF -DUSE_NUMPY=$(usex numpy) -DUSE_OPENCL=$(usex opencl) -DUSE_OPENMP=$(usex openmp) -DUSE_PYTORCH_QNNPACK=$(usex qnnpack) -DUSE_PYTORCH_METAL=OFF -DUSE_SYSTEM_CPUINFO=ON -DUSE_SYSTEM_EIGEN_INSTALL=ON -DUSE_SYSTEM_FP16=ON -DUSE_SYSTEM_FXDIV=ON -DUSE_SYSTEM_GLOO=ON -DUSE_SYSTEM_NVTX=ON -DUSE_SYSTEM_ONNX=ON -DUSE_SYSTEM_PSIMD=ON -DUSE_SYSTEM_PTHREADPOOL=ON -DUSE_SYSTEM_PYBIND11=ON -DUSE_SYSTEM_SLEEF=ON -DUSE_SYSTEM_XNNPACK=$(usex xnnpack) -DUSE_TENSORPIPE=OFF -DUSE_UCC=OFF -DUSE_VALGRIND=OFF -DUSE_XNNPACK=$(usex xnnpack) -DUSE_XPU=OFF -DCMAKE_REQUIRE_FIND_PACKAGE_HIP=ON -DCMAKE_DISABLE_FIND_PACKAGE_hipsparselt=$(usex !cusparselt) -DUSE_ROCM_CK_SDPA=OFF -Wno-dev ) if use mkl; then mycmakeargs+=(-DBLAS=MKL) elif use openblas; then mycmakeargs+=(-DBLAS=OpenBLAS) else mycmakeargs+=(-DBLAS=Generic -DBLAS_LIBRARIES=) fi if use onednn; then mycmakeargs+=( -DMKLDNN_FOUND=ON -DMKLDNN_LIBRARIES=dnnl -DMKLDNN_INCLUDE_DIR="${ESYSROOT}/usr/include/oneapi/dnnl" ) fi append-cxxflags -Wno-deprecated-declarations -Wno-unused-result -Wno-unused-value cmake_src_configure } src_compile() { PYTORCH_BUILD_VERSION=${PV} \ PYTORCH_BUILD_NUMBER=0 \ cmake_src_compile } src_install() { cmake_src_install # CMake cache for pytorch-rocm insinto "/var/lib/caffe2-rocm" doins "${BUILD_DIR}"/CMakeCache.txt # Python torch module to pytorch prefix rm -rf python mkdir -p python/torch || die cp torch/version.py python/torch/ || die local pyver="${EPYTHON#python}" local torch_dest="${PYTORCH_PREFIX}" insinto "${torch_dest}" doins -r python/torch # Create required subdirs in torch module dodir "${torch_dest}/torch/bin" dodir "${torch_dest}/torch/lib" dodir "${torch_dest}/torch/include" dosym "${CAFFE2_PREFIX}/include/torch" "${torch_dest}/torch/include/torch" dosym "${CAFFE2_PREFIX}/bin/torch_shm_manager" "${torch_dest}/torch/bin/torch_shm_manager" dosym "${CAFFE2_PREFIX}/$(get_libdir)/libtorch_global_deps.so" "${torch_dest}/torch/lib/libtorch_global_deps.so" } pkg_postinst() { local active active=$(eselect caffe2 show 2>/dev/null) if [[ "${active}" == "(unset)" || -z "${active}" ]]; then eselect caffe2 set rocm elog "caffe2 backend set to: rocm" fi }