diff --git a/clarifai_datautils/image/annotation_conversion/loaders.py b/clarifai_datautils/image/annotation_conversion/loaders.py index 038d898..b922b6d 100644 --- a/clarifai_datautils/image/annotation_conversion/loaders.py +++ b/clarifai_datautils/image/annotation_conversion/loaders.py @@ -135,8 +135,11 @@ def __getitem__(self, index: int): ]) concept_ids.append(concept_id) - assert len(concept_ids) == len(annots), f"Num concepts must match num bbox annotations\ - for a single image. Found {len(concept_ids)} concepts and {len(annots)} bboxes." + if len(concept_ids) != len(annots): + raise ValueError( + f"Num concepts must match num bbox annotations for a single image." + f" Found {len(concept_ids)} concepts and {len(annots)} bboxes." + ) return VisualDetectionFeatures( image_path, diff --git a/clarifai_datautils/multimodal/pipeline/base.py b/clarifai_datautils/multimodal/pipeline/base.py index 8fa6913..aa764d5 100644 --- a/clarifai_datautils/multimodal/pipeline/base.py +++ b/clarifai_datautils/multimodal/pipeline/base.py @@ -74,7 +74,8 @@ def run(self, # Get files if files is not None: all_files = [files] if isinstance(files, str) else files - assert isinstance(all_files, list), 'Files should be a list of strings.' + if not isinstance(all_files, list): + raise TypeError('Files should be a list of strings.') elif folder is not None: all_files = [os.path.join(folder, f) for f in os.listdir(folder)] diff --git a/clarifai_datautils/multimodal/pipeline/extractors.py b/clarifai_datautils/multimodal/pipeline/extractors.py index d32c1ae..e49758a 100644 --- a/clarifai_datautils/multimodal/pipeline/extractors.py +++ b/clarifai_datautils/multimodal/pipeline/extractors.py @@ -1,3 +1,4 @@ +import logging from typing import List from llama_index.core import Document @@ -17,6 +18,8 @@ from .basetransform import BaseTransform +logger = logging.getLogger(__name__) + class LlamaIndexWrapper(BaseTransform): """ Wrapper class for LlamaIndex Extractor object. """ @@ -33,8 +36,8 @@ def __init__(self, llama_extractor, max_nodes=MAX_NODES, skip_nodes=SKIP_NODES): self.max_nodes = max_nodes self.skip_nodes = skip_nodes self.llama_extractor = llama_extractor - assert (self.llama_extractor.llm.to_dict()['class_name'] == 'ClarifaiLLM' - ), "Only Clarifai LLM Models are allowed for extraction." + if self.llama_extractor.llm.to_dict()['class_name'] != 'ClarifaiLLM': + raise ValueError("Only Clarifai LLM Models are allowed for extraction.") self.parser = SentenceSplitter() def __call__(self, elements: List[str]) -> List[str]: @@ -187,8 +190,8 @@ def __call__(self, elements: List[str]) -> List[str]: if element.text: metadata = {self.key: extract_text_after(element.text, self.string)} element.metadata.update(ElementMetadata.from_dict(metadata)) - except Exception: - pass + except Exception as exc: + logger.debug("ExtractTextAfter skipped an element: %s", exc) return elements @@ -221,6 +224,6 @@ def __call__(self, elements: List[str]) -> List[str]: if element.text: metadata = {self.key: extract_text_before(element.text, self.string)} element.metadata.update(ElementMetadata.from_dict(metadata)) - except Exception: - pass + except Exception as exc: + logger.debug("ExtractTextBefore skipped an element: %s", exc) return elements diff --git a/clarifai_datautils/multimodal/pipeline/summarizer.py b/clarifai_datautils/multimodal/pipeline/summarizer.py index d0b85e2..5951e95 100644 --- a/clarifai_datautils/multimodal/pipeline/summarizer.py +++ b/clarifai_datautils/multimodal/pipeline/summarizer.py @@ -1,5 +1,5 @@ import base64 -import random +import secrets from typing import List try: @@ -58,7 +58,7 @@ def __call__(self, elements: List) -> List: if isinstance(element, Image): element.metadata.update( ElementMetadata.from_dict({ - 'input_id': f'{random.randint(1000000, 99999999)}' + 'input_id': str(secrets.randbelow(89000000) + 1000000) })) img_elements.append(element) new_elements = self._summarize_image(img_elements)