# Copyright 2023-2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

# Autogenerated by pycargoebuild 0.13.3

EAPI=8

DISTUTILS_USE_PEP517=maturin
PYTHON_COMPAT=( python3_{10..13} )
DISTUTILS_EXT=1
DISTUTILS_SINGLE_IMPL=1

CRATES="
	addr2line@0.24.2
	adler2@2.0.0
	aho-corasick@1.1.3
	anes@0.1.4
	anstream@0.6.18
	anstyle@1.0.10
	anstyle-parse@0.2.6
	anstyle-query@1.1.2
	anstyle-wincon@3.0.6
	assert_approx_eq@1.1.0
	autocfg@1.4.0
	backtrace@0.3.74
	base64@0.13.1
	base64@0.21.7
	base64@0.22.1
	bitflags@1.3.2
	bitflags@2.4.0
	bitflags@2.6.0
	bit-set@0.8.0
	bit-vec@0.8.0
	bumpalo@3.16.0
	byteorder@1.5.0
	bytes@1.10.1
	cast@0.3.0
	cc@1.2.6
	cc@1.2.8
	cfg_aliases@0.2.1
	cfg-if@1.0.0
	ciborium@0.2.2
	ciborium-io@0.2.2
	ciborium-ll@0.2.2
	clap@4.5.35
	clap_builder@4.5.35
	clap_lex@0.7.4
	colorchoice@1.0.3
	console@0.15.10
	crc32fast@1.4.2
	criterion@0.5.1
	criterion-plot@0.5.0
	crossbeam-deque@0.8.6
	crossbeam-epoch@0.9.18
	crossbeam-utils@0.8.21
	crunchy@0.2.3
	darling@0.20.10
	darling_core@0.20.10
	darling_macro@0.20.10
	derive_builder@0.20.2
	derive_builder_core@0.20.2
	derive_builder_macro@0.20.2
	dirs@5.0.1
	dirs-sys@0.4.1
	displaydoc@0.2.5
	either@1.13.0
	encode_unicode@1.0.0
	env_filter@0.1.3
	env_logger@0.11.6
	errno@0.3.10
	esaxx-rs@0.1.10
	fancy-regex@0.14.0
	fastrand@2.3.0
	flate2@1.1.1
	fnv@1.0.7
	form_urlencoded@1.2.1
	futures-channel@0.3.31
	futures-core@0.3.31
	futures-io@0.3.31
	futures-macro@0.3.31
	futures-sink@0.3.31
	futures-task@0.3.31
	futures-util@0.3.31
	getrandom@0.2.15
	getrandom@0.3.0
	gimli@0.31.1
	half@2.5.0
	heck@0.5.0
	hermit-abi@0.5.0
	hf-hub@0.4.2
	http@1.3.1
	http-body@1.0.1
	http-body-util@0.1.3
	httparse@1.10.1
	humantime@2.1.0
	hyper@1.6.0
	hyper-rustls@0.27.5
	hyper-util@0.1.11
	icu_collections@1.5.0
	icu_locid@1.5.0
	icu_locid_transform@1.5.0
	icu_locid_transform_data@1.5.1
	icu_normalizer@1.5.0
	icu_normalizer_data@1.5.1
	icu_properties@1.5.1
	icu_properties_data@1.5.1
	icu_provider@1.5.0
	icu_provider_macros@1.5.0
	ident_case@1.0.1
	idna@1.0.3
	idna_adapter@1.2.0
	indicatif@0.17.9
	indoc@2.0.5
	ipnet@2.11.0
	is-terminal@0.4.16
	is_terminal_polyfill@1.70.1
	itertools@0.10.5
	itertools@0.11.0
	itertools@0.12.1
	itertools@0.13.0
	itoa@1.0.14
	js-sys@0.3.76
	js-sys@0.3.77
	lazy_static@1.5.0
	libc@0.2.169
	libc@0.2.171
	libredox@0.1.3
	linux-raw-sys@0.4.14
	litemap@0.7.5
	log@0.4.22
	macro_rules_attribute@0.2.0
	macro_rules_attribute-proc_macro@0.2.0
	matrixmultiply@0.3.9
	memchr@2.7.4
	memoffset@0.9.1
	mime@0.3.17
	minimal-lexical@0.2.1
	miniz_oxide@0.8.7
	mio@1.0.3
	monostate@0.1.13
	monostate-impl@0.1.13
	ndarray@0.16.1
	nom@7.1.3
	nu-ansi-term@0.46.0
	number_prefix@0.4.0
	num-complex@0.4.6
	num-integer@0.1.46
	numpy@0.23.0
	num-traits@0.2.19
	object@0.36.7
	once_cell@1.20.2
	onig@6.4.0
	onig_sys@69.8.1
	oorandom@11.1.5
	option-ext@0.2.0
	overload@0.1.1
	paste@1.0.15
	percent-encoding@2.3.1
	pin-project-lite@0.2.16
	pin-utils@0.1.0
	pkg-config@0.3.31
	plotters@0.3.7
	plotters-backend@0.3.7
	plotters-svg@0.3.7
	portable-atomic@1.10.0
	portable-atomic-util@0.2.4
	ppv-lite86@0.2.20
	proc-macro2@1.0.92
	pyo3@0.23.5
	pyo3-build-config@0.23.5
	pyo3-ffi@0.23.5
	pyo3-macros@0.23.5
	pyo3-macros-backend@0.23.5
	quinn@0.11.7
	quinn-proto@0.11.10
	quinn-udp@0.5.11
	quote@1.0.38
	rand@0.8.5
	rand@0.9.0
	rand_chacha@0.3.1
	rand_chacha@0.9.0
	rand_core@0.6.4
	rand_core@0.9.0
	rawpointer@0.2.1
	rayon@1.10.0
	rayon-cond@0.3.0
	rayon-core@1.12.1
	redox_users@0.4.6
	regex@1.11.1
	regex-automata@0.4.9
	regex-syntax@0.8.5
	reqwest@0.12.15
	ring@0.17.14
	rustc-demangle@0.1.24
	rustc-hash@2.1.0
	rustix@0.38.42
	rustls@0.21.12
	rustls@0.23.25
	rustls-pemfile@2.2.0
	rustls-pki-types@1.11.0
	rustls-webpki@0.101.7
	rustls-webpki@0.103.1
	rustversion@1.0.20
	ryu@1.0.18
	same-file@1.0.6
	sct@0.7.1
	serde@1.0.217
	serde_derive@1.0.217
	serde_json@1.0.134
	serde_urlencoded@0.7.1
	sharded-slab@0.1.7
	shlex@1.3.0
	slab@0.4.9
	smallvec@1.13.2
	socket2@0.5.9
	socks@0.3.4
	spm_precompiled@0.1.4
	stable_deref_trait@1.2.0
	strsim@0.11.1
	subtle@2.6.1
	syn@2.0.93
	synstructure@0.13.1
	sync_wrapper@1.0.2
	target-lexicon@0.12.16
	tempfile@3.14.0
	thiserror@1.0.69
	thiserror@2.0.9
	thiserror-impl@1.0.69
	thiserror-impl@2.0.9
	thread_local@1.1.8
	tinytemplate@1.2.1
	tinyvec@1.9.0
	tinyvec_macros@0.1.1
	tokio@1.44.1
	tokio-rustls@0.26.2
	tokio-util@0.7.14
	tower@0.5.2
	tower-layer@0.3.3
	tower-service@0.3.3
	tracing@0.1.41
	tracing-attributes@0.1.28
	tracing-core@0.1.33
	tracing-log@0.2.0
	tracing-subscriber@0.3.19
	try-lock@0.2.5
	tinystr@0.7.5
	unicode_categories@0.1.1
	unicode-ident@1.0.14
	unicode-normalization-alignments@0.1.12
	unicode-segmentation@1.12.0
	unicode-width@0.2.0
	unindent@0.2.3
	untrusted@0.9.0
	ureq@2.8.0
	url@2.5.4
	utf16_iter@1.0.5
	utf8_iter@1.0.4
	utf8parse@0.2.2
	valuable@0.1.1
	walkdir@2.5.0
	want@0.3.1
	wasi@0.11.0+wasi-snapshot-preview1
	wasi@0.13.3+wasi-0.2.2
	wasm-bindgen@0.2.99
	wasm-bindgen@0.2.100
	wasm-bindgen-backend@0.2.99
	wasm-bindgen-backend@0.2.100
	wasm-bindgen-futures@0.4.50
	wasm-bindgen-macro@0.2.99
	wasm-bindgen-macro@0.2.100
	wasm-bindgen-macro-support@0.2.99
	wasm-bindgen-macro-support@0.2.100
	wasm-bindgen-shared@0.2.99
	wasm-bindgen-shared@0.2.100
	wasm-streams@0.4.2
	webpki-roots@0.25.4
	webpki-roots@0.26.8
	web-sys@0.3.77
	web-time@1.1.0
	winapi@0.3.9
	winapi-i686-pc-windows-gnu@0.4.0
	winapi-util@0.1.9
	winapi-x86_64-pc-windows-gnu@0.4.0
	windows_aarch64_gnullvm@0.48.0
	windows_aarch64_gnullvm@0.52.6
	windows_aarch64_gnullvm@0.53.0
	windows_aarch64_msvc@0.48.0
	windows_aarch64_msvc@0.52.6
	windows_aarch64_msvc@0.53.0
	windows_i686_gnu@0.48.0
	windows_i686_gnu@0.52.6
	windows_i686_gnu@0.53.0
	windows_i686_gnullvm@0.52.6
	windows_i686_gnullvm@0.53.0
	windows_i686_msvc@0.48.0
	windows_i686_msvc@0.52.6
	windows_i686_msvc@0.53.0
	windows-link@0.1.1
	windows-registry@0.4.0
	windows-result@0.3.2
	windows-strings@0.3.0
	windows-sys@0.48.0
	windows-sys@0.52.0
	windows-sys@0.59.0
	windows-targets@0.48.0
	windows-targets@0.52.6
	windows-targets@0.53.0
	windows_x86_64_gnu@0.48.0
	windows_x86_64_gnu@0.52.6
	windows_x86_64_gnu@0.53.0
	windows_x86_64_gnullvm@0.48.0
	windows_x86_64_gnullvm@0.52.6
	windows_x86_64_gnullvm@0.53.0
	windows_x86_64_msvc@0.48.0
	windows_x86_64_msvc@0.52.6
	windows_x86_64_msvc@0.53.0
	wit-bindgen-rt@0.33.0
	write16@1.0.0
	writeable@0.5.5
	yoke@0.7.5
	yoke-derive@0.7.5
	zerocopy@0.7.35
	zerocopy@0.8.24
	zerocopy-derive@0.7.35
	zerocopy-derive@0.8.24
	zerofrom@0.1.6
	zerofrom-derive@0.1.6
	zeroize@1.8.1
	zerovec@0.10.2
	zerovec-derive@0.10.2
"

inherit cargo distutils-r1

DESCRIPTION="Implementation of today's most used tokenizers"
HOMEPAGE="https://github.com/huggingface/tokenizers"
SRC_URI="
	https://github.com/huggingface/${PN}/archive/refs/tags/v${PV}.tar.gz
	-> ${P}.gh.tar.gz
	${CARGO_CRATE_URIS}
"

LICENSE="Apache-2.0"
# Dependent crate licenses
LICENSE+="
	Apache-2.0 Apache-2.0-with-LLVM-exceptions BSD-2 BSD ISC MIT MPL-2.0
	Unicode-DFS-2016
"
SLOT="0"
KEYWORDS="~amd64"

BDEPEND="
	test? ( sci-ml/datasets[${PYTHON_SINGLE_USEDEP}] )
	$(python_gen_cond_dep '
		dev-python/setuptools-rust[${PYTHON_USEDEP}]
	')
"

distutils_enable_tests pytest

QA_FLAGS_IGNORED=".*/site-packages/tokenizers/.*so"

src_unpack() {
	cargo_src_unpack
}

pkg_setup() {
	python-single-r1_pkg_setup
	rust_pkg_setup
}

src_prepare() {
	default
	cd bindings/python
	eapply "${FILESDIR}"/${PN}-0.15.2-test.patch
	distutils-r1_src_prepare
}

src_configure() {
	cd tokenizers
	cargo_src_configure
	cd ../bindings/python
	distutils-r1_src_configure
}

src_compile() {
	cd tokenizers
	cargo_src_compile
	cd ../bindings/python
	distutils-r1_src_compile
}

src_test() {
	cd tokenizers
	# Tests do not work
	#cargo_src_test
	cd ../bindings/python
	local -x EPYTEST_IGNORE=( benches/ )
	distutils-r1_src_test
}

src_install() {
	cd tokenizers
	cd ../bindings/python
	distutils-r1_src_install
}