Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pillow-heif license > change to pi-heif #2665

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 15 additions & 10 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ charset-normalizer==3.3.2
click==8.1.7
# via nltk
dataclasses-json==0.6.4
# via -r base.in
dataclasses-json-speakeasy==0.5.11
# via
# -r base.in
# unstructured-client
deepdiff==6.7.1
# via unstructured-client
emoji==2.10.1
# via -r base.in
Expand All @@ -41,10 +43,9 @@ langdetect==1.0.9
# via -r base.in
lxml==5.1.0
# via -r base.in
marshmallow==3.20.2
marshmallow==3.21.1
# via
# dataclasses-json
# dataclasses-json-speakeasy
# unstructured-client
mypy-extensions==1.0.0
# via
Expand All @@ -54,17 +55,21 @@ nltk==3.8.1
# via -r base.in
numpy==1.26.4
# via -r base.in
packaging==23.2
ordered-set==4.1.0
# via deepdiff
packaging==24.0
# via
# marshmallow
# unstructured-client
python-dateutil==2.8.2
pypdf==4.1.0
# via unstructured-client
python-dateutil==2.9.0.post0
# via unstructured-client
python-iso639==2024.2.7
# via -r base.in
python-magic==0.4.27
# via -r base.in
rapidfuzz==3.6.1
rapidfuzz==3.6.2
# via -r base.in
regex==2023.12.25
# via nltk
Expand All @@ -83,17 +88,17 @@ tabulate==0.9.0
# via -r base.in
tqdm==4.66.2
# via nltk
typing-extensions==4.9.0
typing-extensions==4.10.0
# via
# -r base.in
# pypdf
# typing-inspect
# unstructured-client
typing-inspect==0.9.0
# via
# dataclasses-json
# dataclasses-json-speakeasy
# unstructured-client
unstructured-client==0.18.0
unstructured-client==0.22.0
# via -r base.in
urllib3==1.26.18
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-pdf-image.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ onnx
pdf2image
pdfminer.six
pikepdf
pillow_heif
pi_heif
pypdf
# Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
# when unstructured library is.
Expand Down
83 changes: 59 additions & 24 deletions requirements/extra-pdf-image.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ coloredlogs==15.0.1
# via onnxruntime
contourpy==1.2.0
# via matplotlib
cryptography==42.0.2
cryptography==42.0.5
# via pdfminer-six
cycler==0.12.1
# via matplotlib
Expand All @@ -35,15 +35,16 @@ filelock==3.13.1
# huggingface-hub
# torch
# transformers
flatbuffers==23.5.26
# triton
flatbuffers==24.3.7
# via onnxruntime
fonttools==4.49.0
fonttools==4.50.0
# via matplotlib
fsspec==2024.2.0
fsspec==2024.3.0
# via
# huggingface-hub
# torch
huggingface-hub==0.20.3
huggingface-hub==0.21.4
# via
# timm
# tokenizers
Expand All @@ -55,7 +56,7 @@ idna==3.6
# via
# -c base.txt
# requests
importlib-resources==6.1.1
importlib-resources==6.3.1
# via matplotlib
iopath==0.1.10
# via layoutparser
Expand Down Expand Up @@ -93,6 +94,37 @@ numpy==1.26.4
# scipy
# torchvision
# transformers
nvidia-cublas-cu12==12.1.3.1
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.1.105
# via torch
nvidia-cuda-nvrtc-cu12==12.1.105
# via torch
nvidia-cuda-runtime-cu12==12.1.105
# via torch
nvidia-cudnn-cu12==8.9.2.26
# via torch
nvidia-cufft-cu12==11.0.2.54
# via torch
nvidia-curand-cu12==10.3.2.106
# via torch
nvidia-cusolver-cu12==11.4.5.107
# via torch
nvidia-cusparse-cu12==12.1.0.106
# via
# nvidia-cusolver-cu12
# torch
nvidia-nccl-cu12==2.19.3
# via torch
nvidia-nvjitlink-cu12==12.4.99
# via
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
nvidia-nvtx-cu12==12.1.105
# via torch
omegaconf==2.3.0
# via effdet
onnx==1.15.0
Expand All @@ -108,7 +140,7 @@ opencv-python==4.8.0.76
# -c constraints.in
# layoutparser
# unstructured-inference
packaging==23.2
packaging==24.0
# via
# -c base.txt
# huggingface-hub
Expand All @@ -118,18 +150,20 @@ packaging==23.2
# pytesseract
# transformers
# unstructured-pytesseract
pandas==2.2.0
pandas==2.2.1
# via layoutparser
pdf2image==1.17.0
# via
# -r extra-pdf-image.in
# layoutparser
pdfminer-six==20221105
pdfminer-six==20231228
# via
# -r extra-pdf-image.in
# pdfplumber
pdfplumber==0.10.4
pdfplumber==0.11.0
# via layoutparser
pi-heif==0.15.0
# via -r extra-pdf-image.in
pikepdf==8.11.0
# via -r extra-pdf-image.in
pillow==10.2.0
Expand All @@ -138,13 +172,11 @@ pillow==10.2.0
# matplotlib
# pdf2image
# pdfplumber
# pi-heif
# pikepdf
# pillow-heif
# pytesseract
# torchvision
# unstructured-pytesseract
pillow-heif==0.15.0
# via -r extra-pdf-image.in
portalocker==2.8.2
# via iopath
protobuf==4.23.4
Expand All @@ -162,13 +194,15 @@ pyparsing==3.0.9
# via
# -c constraints.in
# matplotlib
pypdf==4.0.1
# via -r extra-pdf-image.in
pypdfium2==4.27.0
pypdf==4.1.0
# via
# -c base.txt
# -r extra-pdf-image.in
pypdfium2==4.28.0
# via pdfplumber
pytesseract==0.3.10
# via layoutparser
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
# via
# -c base.txt
# matplotlib
Expand All @@ -184,7 +218,7 @@ pyyaml==6.0.1
# omegaconf
# timm
# transformers
rapidfuzz==3.6.1
rapidfuzz==3.6.2
# via
# -c base.txt
# unstructured-inference
Expand All @@ -196,7 +230,6 @@ requests==2.31.0
# via
# -c base.txt
# huggingface-hub
# torchvision
# transformers
safetensors==0.3.2
# via
Expand All @@ -215,18 +248,18 @@ sympy==1.12
# via
# onnxruntime
# torch
timm==0.9.12
timm==0.9.16
# via effdet
tokenizers==0.15.2
# via transformers
torch==2.2.0
torch==2.2.1
# via
# -c constraints.in
# effdet
# layoutparser
# timm
# torchvision
torchvision==0.17.0
torchvision==0.17.1
# via
# effdet
# layoutparser
Expand All @@ -239,7 +272,9 @@ tqdm==4.66.2
# transformers
transformers==4.37.1
# via unstructured-inference
typing-extensions==4.9.0
triton==2.2.0
# via torch
typing-extensions==4.10.0
# via
# -c base.txt
# huggingface-hub
Expand All @@ -263,5 +298,5 @@ wrapt==1.16.0
# via
# -c base.txt
# deprecated
zipp==3.17.0
zipp==3.18.1
# via importlib-resources
2 changes: 1 addition & 1 deletion unstructured/partition/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from pdfminer.pdftypes import PDFObjRef
from pdfminer.utils import open_filename
from PIL import Image as PILImage
from pillow_heif import register_heif_opener
from pi_heif import register_heif_opener

from unstructured.chunking import add_chunking_strategy
from unstructured.cleaners.core import (
Expand Down