in packages/blueprints/gen-ai-chatbot/static-assets/chatbot-genai-components/backend/python/embedding/loaders/unstructured.py [0:0]
def load(self) -> List[Document]:
"""Load file."""
docs: List[Document] = list()
if self.show_progress_bar:
try:
from tqdm import tqdm
except ImportError as e:
raise ImportError(
"Package tqdm must be installed if show_progress_bar=True. "
"Please install with 'pip install tqdm' or set "
"show_progress_bar=False."
) from e
urls = tqdm(self.urls)
else:
urls = self.urls
for url in urls:
try:
elements = partition(
url=url, headers=self.headers, **self.unstructured_kwargs
)
except Exception as e:
if self.continue_on_failure:
logger.error(f"Error fetching or processing {url}, exception: {e}")
continue
else:
raise e
if self.mode == "single":
text = "\n\n".join([str(el) for el in elements])
metadata = {"source": url}
docs.append(Document(page_content=text, metadata=metadata))
elif self.mode == "elements":
for element in elements:
metadata = element.metadata.to_dict()
metadata["category"] = element.category
docs.append(Document(page_content=str(element), metadata=metadata))
return docs