Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions clarifai_datautils/image/annotation_conversion/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,11 @@ def __getitem__(self, index: int):
])
concept_ids.append(concept_id)

assert len(concept_ids) == len(annots), f"Num concepts must match num bbox annotations\
for a single image. Found {len(concept_ids)} concepts and {len(annots)} bboxes."
if len(concept_ids) != len(annots):
raise ValueError(
f"Num concepts must match num bbox annotations for a single image."
f" Found {len(concept_ids)} concepts and {len(annots)} bboxes."
)

return VisualDetectionFeatures(
image_path,
Expand Down
3 changes: 2 additions & 1 deletion clarifai_datautils/multimodal/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def run(self,
# Get files
if files is not None:
all_files = [files] if isinstance(files, str) else files
assert isinstance(all_files, list), 'Files should be a list of strings.'
if not isinstance(all_files, list):
raise TypeError('Files should be a list of strings.')
elif folder is not None:
all_files = [os.path.join(folder, f) for f in os.listdir(folder)]

Expand Down
15 changes: 9 additions & 6 deletions clarifai_datautils/multimodal/pipeline/extractors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import List

from llama_index.core import Document
Expand All @@ -17,6 +18,8 @@

from .basetransform import BaseTransform

logger = logging.getLogger(__name__)


class LlamaIndexWrapper(BaseTransform):
""" Wrapper class for LlamaIndex Extractor object. """
Expand All @@ -33,8 +36,8 @@ def __init__(self, llama_extractor, max_nodes=MAX_NODES, skip_nodes=SKIP_NODES):
self.max_nodes = max_nodes
self.skip_nodes = skip_nodes
self.llama_extractor = llama_extractor
assert (self.llama_extractor.llm.to_dict()['class_name'] == 'ClarifaiLLM'
), "Only Clarifai LLM Models are allowed for extraction."
if self.llama_extractor.llm.to_dict()['class_name'] != 'ClarifaiLLM':
raise ValueError("Only Clarifai LLM Models are allowed for extraction.")
self.parser = SentenceSplitter()

def __call__(self, elements: List[str]) -> List[str]:
Expand Down Expand Up @@ -187,8 +190,8 @@ def __call__(self, elements: List[str]) -> List[str]:
if element.text:
metadata = {self.key: extract_text_after(element.text, self.string)}
element.metadata.update(ElementMetadata.from_dict(metadata))
except Exception:
pass
except Exception as exc:
logger.debug("ExtractTextAfter skipped an element: %s", exc)
return elements


Expand Down Expand Up @@ -221,6 +224,6 @@ def __call__(self, elements: List[str]) -> List[str]:
if element.text:
metadata = {self.key: extract_text_before(element.text, self.string)}
element.metadata.update(ElementMetadata.from_dict(metadata))
except Exception:
pass
except Exception as exc:
logger.debug("ExtractTextBefore skipped an element: %s", exc)
return elements
4 changes: 2 additions & 2 deletions clarifai_datautils/multimodal/pipeline/summarizer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import base64
import random
import secrets
from typing import List

try:
Expand Down Expand Up @@ -58,7 +58,7 @@ def __call__(self, elements: List) -> List:
if isinstance(element, Image):
element.metadata.update(
ElementMetadata.from_dict({
'input_id': f'{random.randint(1000000, 99999999)}'
'input_id': str(secrets.randbelow(89000000) + 1000000)
}))
img_elements.append(element)
new_elements = self._summarize_image(img_elements)
Expand Down
Loading