Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

infer_table_structure in partition_pdf function causes CUDA RuntimeError #2922

Open
naity2 opened this issue Apr 22, 2024 · 0 comments
Open
Labels
bug Something isn't working pdf

Comments

@naity2
Copy link

naity2 commented Apr 22, 2024

Calling partition_pdf with infer_table_structure=True throws a CUDA RuntimeError. I greatly appreciate any help in resolving this issue!

Code:

from unstructured.partition.pdf import partition_pdf

raw_pdf_elements = partition_pdf(
    filename="1_Presentation.pdf",
    extract_images_in_pdf=True,
    infer_table_structure=True,

Error:

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/documents/elements.py:539, in process_metadata.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    537 @functools.wraps(func)
    538 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 539     elements = func(*args, **kwargs)
    540     sig = inspect.signature(func)
    541     params: dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:622, in add_filetype.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    620 @functools.wraps(func)
    621 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 622     elements = func(*args, **kwargs)
    623     sig = inspect.signature(func)
    624     params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:582, in add_metadata.<locals>.wrapper(*args, **kwargs)
    580 @functools.wraps(func)
    581 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 582     elements = func(*args, **kwargs)
    583     sig = inspect.signature(func)
    584     params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/chunking/dispatch.py:83, in add_chunking_strategy.<locals>.wrapper(*args, **kwargs)
     80     return call_args
     82 # -- call the partitioning function to get the elements --
---> 83 elements = func(*args, **kwargs)
     85 # -- look for a chunking-strategy argument --
     86 call_args = get_call_args_applying_defaults()

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:217, in partition_pdf(filename, file, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, include_metadata, metadata_filename, metadata_last_modified, chunking_strategy, links, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, **kwargs)
    213 exactly_one(filename=filename, file=file)
    215 languages = check_language_args(languages or [], ocr_languages) or ["eng"]
--> 217 return partition_pdf_or_image(
    218     filename=filename,
    219     file=file,
    220     include_page_breaks=include_page_breaks,
    221     strategy=strategy,
    222     infer_table_structure=infer_table_structure,
    223     languages=languages,
    224     metadata_last_modified=metadata_last_modified,
    225     hi_res_model_name=hi_res_model_name,
    226     extract_images_in_pdf=extract_images_in_pdf,
    227     extract_image_block_types=extract_image_block_types,
    228     extract_image_block_output_dir=extract_image_block_output_dir,
    229     extract_image_block_to_payload=extract_image_block_to_payload,
    230     date_from_file_object=date_from_file_object,
    231     **kwargs,
    232 )

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:305, in partition_pdf_or_image(filename, file, is_image, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, metadata_last_modified, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, **kwargs)
    303     with warnings.catch_warnings():
    304         warnings.simplefilter("ignore")
--> 305         elements = _partition_pdf_or_image_local(
    306             filename=filename,
    307             file=spooled_to_bytes_io_if_needed(file),
    308             is_image=is_image,
    309             infer_table_structure=infer_table_structure,
    310             include_page_breaks=include_page_breaks,
    311             languages=languages,
    312             metadata_last_modified=metadata_last_modified or last_modification_date,
    313             hi_res_model_name=hi_res_model_name,
    314             pdf_text_extractable=pdf_text_extractable,
    315             extract_images_in_pdf=extract_images_in_pdf,
    316             extract_image_block_types=extract_image_block_types,
    317             extract_image_block_output_dir=extract_image_block_output_dir,
    318             extract_image_block_to_payload=extract_image_block_to_payload,
    319             **kwargs,
    320         )
    321         out_elements = _process_uncategorized_text_elements(elements)
    323 elif strategy == PartitionStrategy.FAST:

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    211 if len(missing_deps) > 0:
    212     raise ImportError(
    213         f"Following dependencies are missing: {', '.join(missing_deps)}. "
    214         + (
   (...)
    218         ),
    219     )
--> 220 return func(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:464, in _partition_pdf_or_image_local(filename, file, is_image, infer_table_structure, include_page_breaks, languages, ocr_mode, model_name, hi_res_model_name, pdf_image_dpi, metadata_last_modified, pdf_text_extractable, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, analysis, analyzed_image_output_dir_path, **kwargs)
    458         # NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
    459         merged_document_layout = merge_inferred_with_extracted_layout(
    460             inferred_document_layout=inferred_document_layout,
    461             extracted_layout=extracted_layout,
    462         )
--> 464         final_document_layout = process_file_with_ocr(
    465             filename,
    466             merged_document_layout,
    467             extracted_layout=extracted_layout,
    468             is_image=is_image,
    469             infer_table_structure=infer_table_structure,
    470             ocr_languages=ocr_languages,
    471             ocr_mode=ocr_mode,
    472             pdf_image_dpi=pdf_image_dpi,
    473         )
    474 else:
    475     inferred_document_layout = process_data_with_model(
    476         file,
    477         is_image=is_image,
    478         model_name=hi_res_model_name,
    479         pdf_image_dpi=pdf_image_dpi,
    480     )

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    211 if len(missing_deps) > 0:
    212     raise ImportError(
    213         f"Following dependencies are missing: {', '.join(missing_deps)}. "
    214         + (
   (...)
    218         ),
    219     )
--> 220 return func(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:177, in process_file_with_ocr(filename, out_layout, extracted_layout, is_image, infer_table_structure, ocr_languages, ocr_mode, pdf_image_dpi)
    175 except Exception as e:
    176     if os.path.isdir(filename) or os.path.isfile(filename):
--> 177         raise e
    178     else:
    179         raise FileNotFoundError(f'File "{filename}" not found!') from e

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:165, in process_file_with_ocr(filename, out_layout, extracted_layout, is_image, infer_table_structure, ocr_languages, ocr_mode, pdf_image_dpi)
    163     extracted_regions = extracted_layout[i] if i < len(extracted_layout) else None
    164     with PILImage.open(image_path) as image:
--> 165         merged_page_layout = supplement_page_layout_with_ocr(
    166             page_layout=out_layout.pages[i],
    167             image=image,
    168             infer_table_structure=infer_table_structure,
    169             ocr_languages=ocr_languages,
    170             ocr_mode=ocr_mode,
    171             extracted_regions=extracted_regions,
    172         )
    173         merged_page_layouts.append(merged_page_layout)
    174 return DocumentLayout.from_pages(merged_page_layouts)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    211 if len(missing_deps) > 0:
    212     raise ImportError(
    213         f"Following dependencies are missing: {', '.join(missing_deps)}. "
    214         + (
   (...)
    218         ),
    219     )
--> 220 return func(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:243, in supplement_page_layout_with_ocr(page_layout, image, infer_table_structure, ocr_languages, ocr_mode, extracted_regions)
    240     if tables.tables_agent is None:
    241         raise RuntimeError("Unable to load table extraction agent.")
--> 243     page_layout.elements[:] = supplement_element_with_table_extraction(
    244         elements=cast(List["LayoutElement"], page_layout.elements),
    245         image=image,
    246         tables_agent=tables.tables_agent,
    247         ocr_languages=ocr_languages,
    248         ocr_agent=ocr_agent,
    249         extracted_regions=extracted_regions,
    250     )
    252 return page_layout

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:287, in supplement_element_with_table_extraction(elements, image, tables_agent, ocr_languages, ocr_agent, extracted_regions)
    272     cropped_image = image.crop(
    273         (
    274             padded_element.bbox.x1,
   (...)
    278         ),
    279     )
    280     table_tokens = get_table_tokens(
    281         table_element_image=cropped_image,
    282         ocr_languages=ocr_languages,
   (...)
    285         table_element=padded_element,
    286     )
--> 287     element.text_as_html = tables_agent.predict(cropped_image, ocr_tokens=table_tokens)
    288 return elements

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:47, in UnstructuredTableTransformerModel.predict(self, x, ocr_tokens)
     31 """Predict table structure deferring to run_prediction with ocr tokens
     32 
     33 Note:
   (...)
     44 FIXME: refactor token data into a dataclass so we have clear expectations of the fields
     45 """
     46 super().predict(x)
---> 47 return self.run_prediction(x, ocr_tokens=ocr_tokens)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:96, in UnstructuredTableTransformerModel.run_prediction(self, x, pad_for_structure_detection, ocr_tokens, result_format)
     88 def run_prediction(
     89     self,
     90     x: Image,
   (...)
     93     result_format: Optional[str] = "html",
     94 ):
     95     """Predict table structure"""
---> 96     outputs_structure = self.get_structure(x, pad_for_structure_detection)
     97     if ocr_tokens is None:
     98         raise ValueError("Cannot predict table structure with no OCR tokens")

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:84, in UnstructuredTableTransformerModel.get_structure(self, x, pad_for_structure_detection)
     79 logger.info(f"padding image by {pad_for_structure_detection} for structure detection")
     80 encoding = self.feature_extractor(
     81     pad_image_with_background_color(x, pad_for_structure_detection),
     82     return_tensors="pt",
     83 ).to(self.device)
---> 84 outputs_structure = self.model(**encoding)
     85 outputs_structure["pad_for_structure_detection"] = pad_for_structure_detection
     86 return outputs_structure

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:1456, in TableTransformerForObjectDetection.forward(self, pixel_values, pixel_mask, decoder_attention_mask, encoder_outputs, inputs_embeds, decoder_inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
   1453 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
   1455 # First, sent images through TABLE_TRANSFORMER base model to obtain encoder + decoder outputs
-> 1456 outputs = self.model(
   1457     pixel_values,
   1458     pixel_mask=pixel_mask,
   1459     decoder_attention_mask=decoder_attention_mask,
   1460     encoder_outputs=encoder_outputs,
   1461     inputs_embeds=inputs_embeds,
   1462     decoder_inputs_embeds=decoder_inputs_embeds,
   1463     output_attentions=output_attentions,
   1464     output_hidden_states=output_hidden_states,
   1465     return_dict=return_dict,
   1466 )
   1468 sequence_output = outputs[0]
   1470 # class logits + predicted bounding boxes

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:1316, in TableTransformerModel.forward(self, pixel_values, pixel_mask, decoder_attention_mask, encoder_outputs, inputs_embeds, decoder_inputs_embeds, output_attentions, output_hidden_states, return_dict)
   1312 # Fourth, sent flattened_features + flattened_mask + object queries through encoder
   1313 # flattened_features is a Tensor of shape (batch_size, heigth*width, hidden_size)
   1314 # flattened_mask is a Tensor of shape (batch_size, heigth*width)
   1315 if encoder_outputs is None:
-> 1316     encoder_outputs = self.encoder(
   1317         inputs_embeds=flattened_features,
   1318         attention_mask=flattened_mask,
   1319         object_queries=object_queries,
   1320         output_attentions=output_attentions,
   1321         output_hidden_states=output_hidden_states,
   1322         return_dict=return_dict,
   1323     )
   1324 # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
   1325 elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:977, in TableTransformerEncoder.forward(self, inputs_embeds, attention_mask, object_queries, output_attentions, output_hidden_states, return_dict)
    974     layer_outputs = (None, None)
    975 else:
    976     # we add object_queries as extra input to the encoder_layer
--> 977     layer_outputs = encoder_layer(
    978         hidden_states,
    979         attention_mask,
    980         object_queries=object_queries,
    981         output_attentions=output_attentions,
    982     )
    984     hidden_states = layer_outputs[0]
    986 if output_attentions:

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:643, in TableTransformerEncoderLayer.forward(self, hidden_states, attention_mask, object_queries, output_attentions)
    640 residual = hidden_states
    641 hidden_states = self.self_attn_layer_norm(hidden_states)
--> 643 hidden_states, attn_weights = self.self_attn(
    644     hidden_states=hidden_states,
    645     attention_mask=attention_mask,
    646     object_queries=object_queries,
    647     output_attentions=output_attentions,
    648 )
    650 hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
    651 hidden_states = residual + hidden_states

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:538, in TableTransformerAttention.forward(self, hidden_states, attention_mask, object_queries, key_value_states, spatial_position_embeddings, output_attentions, **kwargs)
    535     key_value_states = self.with_pos_embed(key_value_states, spatial_position_embeddings)
    537 # get query proj
--> 538 query_states = self.q_proj(hidden_states) * self.scaling
    539 # get key, value proj
    540 if is_cross_attention:
    541     # cross_attentions

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/linear.py:116, in Linear.forward(self, input)
    115 def forward(self, input: Tensor) -> Tensor:
--> 116     return F.linear(input, self.weight, self.bias)

RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
@scanny scanny added bug Something isn't working pdf labels Apr 23, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working pdf
Projects
None yet
Development

No branches or pull requests

2 participants