# automatically generated by g-sorcery
# please do not edit this file

EAPI=8

REALNAME="${PN}"
LITERALNAME="${PN}"
REALVERSION="${PV}"
DIGEST_SOURCES="yes"
PYTHON_COMPAT=( python{3_11,3_12,3_13,3_14} )
DISTUTILS_USE_PEP517=standalone

inherit python-r1 gs-pypi

DESCRIPTION="A library that prepares raw documents for downstream ML tasks."

HOMEPAGE="https://github.com/Unstructured-IO/unstructured"
LICENSE="Apache-2.0"
SRC_URI="https://files.pythonhosted.org/packages/source/${REALNAME::1}/${REALNAME}/${REALNAME}-${REALVERSION}.tar.gz"
SOURCEFILE="${REALNAME}-${REALVERSION}.tar.gz"
RESTRICT="test"

SLOT="0"
KEYWORDS="~amd64 ~x86"

IUSE="all-docs csv doc docx epub image"
DEPENDENCIES="dev-python/charset-normalizer[${PYTHON_USEDEP}]
	dev-python/filetype[${PYTHON_USEDEP}]
	dev-python/python-magic[${PYTHON_USEDEP}]
	dev-python/lxml[${PYTHON_USEDEP}]
	dev-python/nltk[${PYTHON_USEDEP}]
	dev-python/requests[${PYTHON_USEDEP}]
	dev-python/beautifulsoup4[${PYTHON_USEDEP}]
	dev-python/emoji[${PYTHON_USEDEP}]
	dev-python/dataclasses-json[${PYTHON_USEDEP}]
	dev-python/python-iso639[${PYTHON_USEDEP}]
	dev-python/langdetect[${PYTHON_USEDEP}]
	dev-python/numpy[${PYTHON_USEDEP}]
	dev-python/rapidfuzz[${PYTHON_USEDEP}]
	dev-python/backoff[${PYTHON_USEDEP}]
	dev-python/typing-extensions[${PYTHON_USEDEP}]
	dev-python/unstructured-client[${PYTHON_USEDEP}]
	dev-python/wrapt[${PYTHON_USEDEP}]
	dev-python/tqdm[${PYTHON_USEDEP}]
	dev-python/psutil[${PYTHON_USEDEP}]
	dev-python/python-oxmsg[${PYTHON_USEDEP}]
	dev-python/html5lib[${PYTHON_USEDEP}]
	dev-python/numba[${PYTHON_USEDEP}]
	all-docs? ( dev-python/pandas[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/msoffcrypto-tool[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/pi-heif[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/google-cloud-vision[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/unstructured-pytesseract[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/pdfminer-six[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/pikepdf[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/markdown[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/pypandoc[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/onnx[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/pdf2image[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/xlrd[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/unstructured-inference[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/python-pptx[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/openpyxl[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/effdet[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/onnxruntime[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/python-docx[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/networkx[${PYTHON_USEDEP}] )
	all-docs? ( dev-python/pypdf[${PYTHON_USEDEP}] )
	csv? ( dev-python/pandas[${PYTHON_USEDEP}] )
	doc? ( dev-python/python-docx[${PYTHON_USEDEP}] )
	docx? ( dev-python/python-docx[${PYTHON_USEDEP}] )
	epub? ( dev-python/pypandoc[${PYTHON_USEDEP}] )
	image? ( dev-python/onnx[${PYTHON_USEDEP}] )
	image? ( dev-python/onnxruntime[${PYTHON_USEDEP}] )
	image? ( dev-python/pdf2image[${PYTHON_USEDEP}] )
	image? ( dev-python/pdfminer-six[${PYTHON_USEDEP}] )"
BDEPEND="${DEPENDENCIES}"
RDEPEND="${DEPENDENCIES}"