# Copyright 2026 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=8 DISTUTILS_EXT=1 DISTUTILS_USE_PEP517=setuptools PYTHON_COMPAT=( python3_{11..14} ) RUST_MIN_VER="1.85.0" CRATES=" aho-corasick@1.1.4 autocfg@1.5.0 bit-set@0.5.3 bit-vec@0.6.3 bstr@1.12.1 fancy-regex@0.13.0 heck@0.5.0 indoc@2.0.7 libc@0.2.183 memchr@2.8.0 memoffset@0.9.1 once_cell@1.21.4 portable-atomic@1.13.1 proc-macro2@1.0.106 pyo3-build-config@0.26.0 pyo3-ffi@0.26.0 pyo3-macros-backend@0.26.0 pyo3-macros@0.26.0 pyo3@0.26.0 quote@1.0.45 regex-automata@0.4.14 regex-syntax@0.8.10 regex@1.12.3 rustc-hash@2.1.1 rustversion@1.0.22 serde@1.0.228 serde_core@1.0.228 serde_derive@1.0.228 syn@2.0.117 target-lexicon@0.13.5 unicode-ident@1.0.24 unindent@0.2.4 " inherit cargo distutils-r1 optfeature pypi DESCRIPTION="A fast BPE tokeniser for use with OpenAI's models" HOMEPAGE=" https://github.com/openai/tiktoken https://pypi.org/project/tiktoken/ " TTE_TAG=2026.03.26.0 TTE_BASE_URI="https://github.com/falbrechtskirchinger/overlay-assets/releases/download" SRC_URI+=" ${CARGO_CRATE_URIS} test? ( ${TTE_BASE_URI}/v${TTE_TAG}/tiktoken-encodings-v${TTE_TAG%.*}.tar.xz ) " # The encodings cache (tiktoken-encodings-*.tar.xz) holds files named after # the SHA-1 of their URL. It can be generated from the source directory via: # grep -Eo 'https://openaipublic.blob[^"]+' tiktoken_ext/openai_public.py | \ # sort -u | while read u; do h=$(echo -n "$u" | sha1sum | awk '{print $1}'); \ # wget -O "$h" "$u" ; done # Include the license file from the source repo: # https://github.com/openai/tiktoken/issues/92 LICENSE="MIT" # Dependent crate licenses LICENSE+=" Apache-2.0-with-LLVM-exceptions MIT Unicode-3.0" SLOT="0" KEYWORDS="~amd64 ~arm64" RDEPEND=" dev-python/regex[${PYTHON_USEDEP}] dev-python/requests[${PYTHON_USEDEP}] " BDEPEND=" test? ( dev-python/blobfile[${PYTHON_USEDEP}] ) " PATCHES=( # test_encoding.py::test_hyp_roundtrip throws ValueError for special tokens "${FILESDIR}/tiktoken-0.12.0-special-token-roudtrip.patch" ) EPYTEST_PLUGINS=( hypothesis pytest-{asyncio,timeout} ) distutils_enable_tests pytest python_test() { local -x PATH=${BUILD_DIR}/install/usr/bin:${PATH} local -x TIKTOKEN_CACHE_DIR="${WORKDIR}/tiktoken-encodings" rm -rf tiktoken || die epytest } pkg_postinst() { optfeature "reading GCS, ABS files" dev-python/blobfile }