# Copyright 2026 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

ROCM_VERSION="6.3"

inherit cmake cuda rocm linux-info systemd

DESCRIPTION="Diffusion model(SD,Flux,Wan,Qwen Image,Z-Image,...) inference in pure C/C++"
HOMEPAGE="https://github.com/leejet/stable-diffusion.cpp"

# Pinned to master-669-2d40a8b (commit 2d40a8b2adcdf8b5b0ca0535f3bb7801b6ba13e5, 2026-06-02)
# ggml is a git submodule (leejet/ggml fork) -- GitHub tarballs don't include submodules
GGML_COMMIT="0ce7ad348a3151e1da9f65d962044546bcaad421"
SRC_URI="
    https://github.com/leejet/stable-diffusion.cpp/archive/2d40a8b2adcdf8b5b0ca0535f3bb7801b6ba13e5.tar.gz -> ${P}.gh.tar.gz
    https://github.com/leejet/ggml/archive/${GGML_COMMIT}.tar.gz -> ${P}-ggml.gh.tar.gz
"
S="${WORKDIR}/stable-diffusion.cpp-2d40a8b2adcdf8b5b0ca0535f3bb7801b6ba13e5"
KEYWORDS="~amd64"

LICENSE="MIT"
SLOT="0"

IUSE="
    openblas blis rocm cuda opencl vulkan flexiblas webm webp wmma 
    cpu_flags_x86_avx cpu_flags_x86_avx2 cpu_flags_x86_fma3 cpu_flags_x86_f16c
    cpu_flags_x86_bmi2 cpu_flags_x86_avx_vnni cpu_flags_x86_avx512f 
    cpu_flags_x86_avx512vbmi cpu_flags_x86_avx512_vnni 
    cpu_flags_x86_avx512_bf16
"

REQUIRED_USE="
    ?? ( openblas blis flexiblas )
    webm? ( webp )
    wmma? ( rocm )
    cpu_flags_x86_avx2? ( cpu_flags_x86_avx )
    cpu_flags_x86_avx512f? ( cpu_flags_x86_avx2 )
    cpu_flags_x86_avx512vbmi? ( cpu_flags_x86_avx512f )
    cpu_flags_x86_avx512_vnni? ( cpu_flags_x86_avx512f )
    cpu_flags_x86_avx512_bf16? ( cpu_flags_x86_avx512f )
"

CDEPEND="
    openblas? ( sci-libs/openblas:= )
    blis? ( sci-libs/blis:= )
    flexiblas? ( sci-libs/flexiblas:= )
    rocm? (
        >=dev-util/hip-${ROCM_VERSION}:=
        >=sci-libs/hipBLAS-${ROCM_VERSION}:=
        wmma? (
            >=sci-libs/rocWMMA-${ROCM_VERSION}:=
        )
    )
    cuda? ( dev-util/nvidia-cuda-toolkit:= )
    webp? ( media-libs/libwebp )
    webm? ( media-libs/libwebm )
"
DEPEND="${CDEPEND}
    opencl? ( dev-util/opencl-headers )
    vulkan? ( dev-util/vulkan-headers )
"
RDEPEND="${CDEPEND}
    acct-group/sd-cpp
    acct-user/sd-cpp
    dev-python/numpy
    opencl? ( dev-libs/opencl-icd-loader )
    vulkan? ( media-libs/vulkan-loader )
"
BDEPEND="media-libs/shaderc"

pkg_setup() {
    if use rocm; then
        linux-info_pkg_setup
        if linux-info_get_any_version && linux_config_exists; then
            if ! linux_chkconfig_present HSA_AMD_SVM; then
                ewarn "To use ROCm/HIP, you need to have HSA_AMD_SVM option enabled in your kernel."
            fi
        fi
    fi
}

src_prepare() {
    # Unpack ggml submodule into the expected directory
    rmdir "${S}/ggml" 2>/dev/null
    mv "${WORKDIR}/ggml-${GGML_COMMIT}" "${S}/ggml" || die "Failed to place ggml submodule"

    use cuda && cuda_src_prepare
    cmake_src_prepare
}

src_configure() {
    local mycmakeargs=(
        # -- Build settings --
        -DCMAKE_SKIP_BUILD_RPATH=ON
        -DSD_BUILD_SHARED_LIBS=OFF
        -DSD_SERVER_BUILD_FRONTEND=OFF  # requires pnpm and network access
        -DGENTOO_REMOVE_CMAKE_BLAS_HACK=ON

        # -- CPU feature selection (no -march=native) --
        -DGGML_NATIVE=0
        -DGGML_SSE42=ON
        -DGGML_AVX=$(usex cpu_flags_x86_avx)
        -DGGML_AVX2=$(usex cpu_flags_x86_avx2)
        -DGGML_BMI2=$(usex cpu_flags_x86_bmi2)
        -DGGML_FMA=$(usex cpu_flags_x86_fma3)
        -DGGML_F16C=$(usex cpu_flags_x86_f16c)
        -DGGML_AVX_VNNI=$(usex cpu_flags_x86_avx_vnni)
        -DGGML_AVX512=$(usex cpu_flags_x86_avx512f)
        -DGGML_AVX512_VBMI=$(usex cpu_flags_x86_avx512vbmi)
        -DGGML_AVX512_VNNI=$(usex cpu_flags_x86_avx512_vnni)
        -DGGML_AVX512_BF16=$(usex cpu_flags_x86_avx512_bf16)

        # -- Networking / optional features --
        -DGGML_RPC=ON
        -DSD_CUDA=$(usex cuda)
        -DSD_OPENCL=$(usex opencl)
        -DSD_WEBP=$(usex webp)
        -DSD_USE_SYSTEM_WEBP=$(usex webp)
        -DSD_WEBM=$(usex webm)
        -DSD_USE_SYSTEM_WEBM=$(usex webm)
        -DSD_VULKAN=$(usex vulkan)

        # -- Install paths (avoid clashing with whisper.cpp) --
        -DCMAKE_INSTALL_LIBDIR="${EPREFIX}/usr/$(get_libdir)/stable-diffusion.cpp"
        -DCMAKE_INSTALL_RPATH="${EPREFIX}/usr/$(get_libdir)/stable-diffusion.cpp"
    )

    # -- BLAS vendor selection --
    if use openblas; then
        mycmakeargs+=(
            -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
        )
    fi

    if use blis; then
        mycmakeargs+=(
            -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FLAME
        )
    fi

    if use flexiblas; then
        mycmakeargs+=(
            -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FlexiBLAS
        )
    fi

    # -- CUDA --
    if use cuda; then
        local -x CUDAHOSTCXX="$(cuda_gccdir)"
        # tries to recreate dev symlinks
        cuda_add_sandbox
        addpredict "/dev/char/"
    fi

    # -- ROCm/HIP --
    if use rocm; then
        rocm_use_hipcc
        mycmakeargs+=(
            -DSD_HIPBLAS=ON
            -DAMDGPU_TARGETS=$(get_amdgpu_flags)
            -DGPU_TARGETS=$(get_amdgpu_flags)
            -DGGML_HIP_ROCWMMA_FATTN=$(usex wmma)
        )
    fi

    cmake_src_configure
}

src_install() {
    cmake_src_install

    # avoid clashing with whisper.cpp
    rm -rf "${ED}/usr/include"

    find "${ED}" -name "*.a" -delete || die

    # -- systemd service for the sd-server HTTP backend --
    insinto /etc/sd-cpp
    doins "${FILESDIR}/sd-server.conf"

    systemd_dounit "${FILESDIR}/sd-server.service"

    keepdir /var/lib/sd-cpp/models
}

pkg_preinst() {
    keepdir /var/lib/sd-cpp
    keepdir /var/lib/sd-cpp/models
    fowners sd-cpp:sd-cpp /var/lib/sd-cpp
    fowners sd-cpp:sd-cpp /var/lib/sd-cpp/models
    fperms 0750 /var/lib/sd-cpp
    fperms 0750 /var/lib/sd-cpp/models
}

pkg_postinst() {
    elog ""
    elog "Before starting the sd-server service you MUST configure:"
    elog "  /etc/sd-cpp/sd-server.conf"
    elog ""
    elog "At minimum, set:"
    elog "  SD_MODEL   - path to your diffusion model (.gguf / .safetensors / .pt)"
    elog "  SD_THREADS - number of physical CPU cores (NOT hyperthreads)"
    elog ""
    elog "Then:  systemctl enable --now sd-server"
    elog ""

    if use rocm || use cuda; then
        elog "GPU users: ensure the sd-cpp user has access to the render/video"
        elog "device nodes (the acct-user ebuild adds render+video groups)."
        elog ""
    fi
}