# Copyright 2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

CRATES="
    addr2line-0.24.2
    adler2-2.0.1
    ahash-0.8.12
    aho-corasick-1.1.3
    anstream-0.6.20
    anstyle-1.0.11
    anstyle-parse-0.2.7
    anstyle-query-1.1.4
    anstyle-wincon-3.0.10
    autocfg-1.5.0
    backtrace-0.3.75
    base64-0.13.1
    bitflags-2.9.4
    bumpalo-3.19.0
    castaway-0.2.4
    cc-1.2.38
    cfg-if-1.0.3
    colorchoice-1.0.4
    compact_str-0.9.0
    console-0.15.11
    crossbeam-deque-0.8.6
    crossbeam-epoch-0.9.18
    crossbeam-utils-0.8.21
    darling-0.20.11
    darling_core-0.20.11
    darling_macro-0.20.11
    dary_heap-0.3.8
    derive_builder-0.20.2
    derive_builder_core-0.20.2
    derive_builder_macro-0.20.2
    either-1.15.0
    encode_unicode-1.0.0
    env_filter-0.1.3
    env_logger-0.11.8
    errno-0.3.14
    esaxx-rs-0.1.10
    fastrand-2.3.0
    find-msvc-tools-0.1.2
    fnv-1.0.7
    futures-0.3.31
    futures-channel-0.3.31
    futures-core-0.3.31
    futures-executor-0.3.31
    futures-io-0.3.31
    futures-macro-0.3.31
    futures-sink-0.3.31
    futures-task-0.3.31
    futures-util-0.3.31
    getrandom-0.3.3
    gimli-0.31.1
    heck-0.5.0
    ident_case-1.0.1
    indicatif-0.17.11
    indoc-2.0.6
    io-uring-0.7.10
    is_terminal_polyfill-1.70.1
    itertools-0.14.0
    itoa-1.0.15
    jiff-0.2.15
    jiff-static-0.2.15
    js-sys-0.3.80
    libc-0.2.175
    linux-raw-sys-0.11.0
    log-0.4.28
    macro_rules_attribute-0.2.2
    macro_rules_attribute-proc_macro-0.2.2
    matrixmultiply-0.3.10
    memchr-2.7.5
    memoffset-0.9.1
    minimal-lexical-0.2.1
    miniz_oxide-0.8.9
    mio-1.0.4
    monostate-0.1.16
    monostate-impl-0.1.16
    ndarray-0.16.1
    nom-7.1.3
    number_prefix-0.4.0
    num-complex-0.4.6
    num-integer-0.1.46
    numpy-0.25.0
    num-traits-0.2.19
    object-0.36.7
    once_cell-1.21.3
    once_cell_polyfill-1.70.1
    onig-6.5.1
    onig_sys-69.9.1
    paste-1.0.15
    pin-project-lite-0.2.16
    pin-utils-0.1.0
    pkg-config-0.3.32
    portable-atomic-1.11.1
    portable-atomic-util-0.2.4
    ppv-lite86-0.2.21
    proc-macro2-1.0.101
    pyo3-0.25.1
    pyo3-async-runtimes-0.25.0
    pyo3-build-config-0.25.1
    pyo3-ffi-0.25.1
    pyo3-macros-0.25.1
    pyo3-macros-backend-0.25.1
    quote-1.0.40
    rand-0.9.2
    rand_chacha-0.9.0
    rand_core-0.9.3
    rawpointer-0.2.1
    rayon-1.11.0
    rayon-cond-0.4.0
    rayon-core-1.13.0
    r-efi-5.3.0
    regex-1.11.2
    regex-automata-0.4.10
    regex-syntax-0.8.6
    rustc-demangle-0.1.26
    rustc-hash-2.1.1
    rustix-1.1.2
    rustversion-1.0.22
    ryu-1.0.20
    serde-1.0.225
    serde_core-1.0.225
    serde_derive-1.0.225
    serde_json-1.0.145
    shlex-1.3.0
    signal-hook-registry-1.4.6
    slab-0.4.11
    smallvec-1.15.1
    spm_precompiled-0.1.4
    static_assertions-1.1.0
    strsim-0.11.1
    syn-2.0.106
    target-lexicon-0.13.3
    tempfile-3.22.0
    thiserror-2.0.16
    thiserror-impl-2.0.16
    tokenizers-0.22.1
    tokio-1.47.1
    tokio-macros-2.5.0
    unicode_categories-0.1.1
    unicode-ident-1.0.19
    unicode-normalization-alignments-0.1.12
    unicode-segmentation-1.12.0
    unicode-width-0.2.1
    unindent-0.2.4
    utf8parse-0.2.2
    version_check-0.9.5
    wasi-0.11.1+wasi-snapshot-preview1
    wasi-0.14.7+wasi-0.2.4
    wasip2-1.0.1+wasi-0.2.4
    wasm-bindgen-0.2.103
    wasm-bindgen-backend-0.2.103
    wasm-bindgen-macro-0.2.103
    wasm-bindgen-macro-support-0.2.103
    wasm-bindgen-shared-0.2.103
    web-time-1.1.0
    windows_aarch64_gnullvm-0.52.6
    windows_aarch64_gnullvm-0.53.0
    windows_aarch64_msvc-0.52.6
    windows_aarch64_msvc-0.53.0
    windows_i686_gnu-0.52.6
    windows_i686_gnu-0.53.0
    windows_i686_gnullvm-0.52.6
    windows_i686_gnullvm-0.53.0
    windows_i686_msvc-0.52.6
    windows_i686_msvc-0.53.0
    windows-link-0.1.3
    windows-link-0.2.0
    windows-sys-0.59.0
    windows-sys-0.60.2
    windows-sys-0.61.0
    windows-targets-0.52.6
    windows-targets-0.53.3
    windows_x86_64_gnu-0.52.6
    windows_x86_64_gnu-0.53.0
    windows_x86_64_gnullvm-0.52.6
    windows_x86_64_gnullvm-0.53.0
    windows_x86_64_msvc-0.52.6
    windows_x86_64_msvc-0.53.0
    wit-bindgen-0.46.0
    zerocopy-0.8.27
    zerocopy-derive-0.8.27
    "
DISTUTILS_USE_PEP517=maturin
PYTHON_COMPAT=( python3_{12..14} )
#DISTUTILS_SINGLE_IMPL=1

inherit cargo distutils-r1 pypi

DESCRIPTION="Fast, efficient, Rust-backed tokenizers for Python (HuggingFace)"
HOMEPAGE="https://github.com/huggingface/tokenizers https://pypi.org/project/tokenizers/"
SRC_URI+="
	${CARGO_CRATE_URIS}
"

LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~amd64 ~arm64 ~x86"

IUSE="onig hf-hub indicatif fancy-regex"

RDEPEND="
    hf-hub? ( dev-python/huggingface_hub[${PYTHON_USEDEP}] )
"
DEPEND="${RDEPEND}"

src_prepare() {
    default
    rm -rf .github || die
}

python_compile() {
    local features=()

    use onig && features+=( onig )
    use hf-hub && features+=( hf-hub )
    use indicatif && features+=( indicatif )
    use fancy-regex && features+=( fancy-regex )
    export MATURIN_FEATURES="${features[*]}"
    distutils-r1_python_compile
}

#python_install() {
#    cd bindings/python || die
#    distutils-r1_python_install
#}

python_test() {
    distutils-r1_python_install
}

distutils_enable_tests pytest

# Ignore temperature false-positives for the Rust .so
QA_FLAGS_IGNORED=".*site-packages/tokenizers/.*\.so"