# Copyright 2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

CRATES="
	ahash@0.8.12
	aho-corasick@1.1.4
	anstream@0.6.21
	anstyle@1.0.13
	anstyle-parse@0.2.7
	anstyle-query@1.1.5
	anstyle-wincon@3.0.11
	autocfg@1.5.0
	base64@0.13.1
	bitflags@2.10.0
	bumpalo@3.19.0
	castaway@0.2.4
	cc@1.2.48
	cfg-if@1.0.4
	colorchoice@1.0.4
	compact_str@0.9.0
	console@0.16.1
	crossbeam-deque@0.8.6
	crossbeam-epoch@0.9.18
	crossbeam-utils@0.8.21
	darling@0.20.11
	darling_core@0.20.11
	darling_macro@0.20.11
	dary_heap@0.3.8
	derive_builder@0.20.2
	derive_builder_core@0.20.2
	derive_builder_macro@0.20.2
	either@1.15.0
	encode_unicode@1.0.0
	env_filter@0.1.4
	env_logger@0.11.8
	errno@0.3.14
	esaxx-rs@0.1.10
	fastrand@2.3.0
	find-msvc-tools@0.1.5
	fnv@1.0.7
	futures@0.3.31
	futures-channel@0.3.31
	futures-core@0.3.31
	futures-executor@0.3.31
	futures-io@0.3.31
	futures-macro@0.3.31
	futures-sink@0.3.31
	futures-task@0.3.31
	futures-util@0.3.31
	getrandom@0.3.4
	heck@0.5.0
	ident_case@1.0.1
	indicatif@0.18.3
	indoc@2.0.7
	is_terminal_polyfill@1.70.2
	itertools@0.14.0
	itoa@1.0.15
	jiff@0.2.16
	jiff-static@0.2.16
	js-sys@0.3.83
	libc@0.2.177
	linux-raw-sys@0.11.0
	log@0.4.28
	macro_rules_attribute@0.2.2
	macro_rules_attribute-proc_macro@0.2.2
	matrixmultiply@0.3.10
	memchr@2.7.6
	memoffset@0.9.1
	minimal-lexical@0.2.1
	mio@1.1.0
	monostate@0.1.18
	monostate-impl@0.1.18
	ndarray@0.16.1
	nom@7.1.3
	num-complex@0.4.6
	num-integer@0.1.46
	numpy@0.26.0
	num-traits@0.2.19
	once_cell@1.21.3
	once_cell_polyfill@1.70.2
	onig@6.5.1
	onig_sys@69.9.1
	paste@1.0.15
	pin-project-lite@0.2.16
	pin-utils@0.1.0
	pkg-config@0.3.32
	portable-atomic@1.11.1
	portable-atomic-util@0.2.4
	ppv-lite86@0.2.21
	proc-macro2@1.0.103
	pyo3@0.26.0
	pyo3-async-runtimes@0.26.0
	pyo3-build-config@0.26.0
	pyo3-ffi@0.26.0
	pyo3-macros@0.26.0
	pyo3-macros-backend@0.26.0
	quote@1.0.42
	rand@0.9.2
	rand_chacha@0.9.0
	rand_core@0.9.3
	rawpointer@0.2.1
	rayon@1.11.0
	rayon-cond@0.4.0
	rayon-core@1.13.0
	r-efi@5.3.0
	regex@1.12.2
	regex-automata@0.4.13
	regex-syntax@0.8.8
	rustc-hash@2.1.1
	rustix@1.1.2
	rustversion@1.0.22
	ryu@1.0.20
	serde@1.0.228
	serde_core@1.0.228
	serde_derive@1.0.228
	serde_json@1.0.145
	shlex@1.3.0
	signal-hook-registry@1.4.7
	slab@0.4.11
	smallvec@1.15.1
	spm_precompiled@0.1.4
	static_assertions@1.1.0
	strsim@0.11.1
	syn@2.0.111
	target-lexicon@0.13.3
	tempfile@3.23.0
	thiserror@2.0.17
	thiserror-impl@2.0.17
	tokio@1.48.0
	tokio-macros@2.6.0
	unicode_categories@0.1.1
	unicode-ident@1.0.22
	unicode-normalization-alignments@0.1.12
	unicode-segmentation@1.12.0
	unicode-width@0.2.2
	unindent@0.2.4
	unit-prefix@0.5.2
	utf8parse@0.2.2
	version_check@0.9.5
	wasi@0.11.1+wasi-snapshot-preview1
	wasip2@1.0.1+wasi-0.2.4
	wasm-bindgen@0.2.106
	wasm-bindgen-macro@0.2.106
	wasm-bindgen-macro-support@0.2.106
	wasm-bindgen-shared@0.2.106
	web-time@1.1.0
	windows-link@0.2.1
	windows-sys@0.61.2
	wit-bindgen@0.46.0
	zerocopy@0.8.31
	zerocopy-derive@0.8.31
"

DISTUTILS_EXT=1
DISTUTILS_USE_PEP517=maturin
PYTHON_COMPAT=( python3_{11..13} )

inherit cargo distutils-r1

DESCRIPTION="Fast State-of-the-Art Tokenizers optimized for Research and Production"
HOMEPAGE="https://github.com/huggingface/tokenizers"
SRC_URI="
	https://files.pythonhosted.org/packages/source/t/tokenizers/tokenizers-${PV}.tar.gz
	${CARGO_CRATE_URIS}
"

LICENSE="Apache-2.0"
LICENSE+=" Apache-2.0 MIT Unicode-3.0"
SLOT="0"
KEYWORDS="~amd64"

RDEPEND="
	>=dev-python/huggingface-hub-0.16.4[${PYTHON_USEDEP}]
"

DEPEND="${RDEPEND}"

QA_FLAGS_IGNORED="usr/lib.*/py.*/site-packages/tokenizers/.*\.so"