app/utils/helpers.py (18 lines of code) (raw):

"""(c) 2025, Elastic Co. Author: Adhish Thite <adhish.thite@elastic.co> """ import os import json import hashlib from typing import List, Dict, Any, Generator from app.config.logging_config import setup_logger logger = setup_logger(__name__) def init(output_dir: str) -> None: """Initialize the application by creating the output directory if it doesn't exist.""" if not os.path.exists(output_dir): os.makedirs(output_dir) logger.info(f"📁 Created output directory: {output_dir}") def generate_hash(data: Any) -> str: """Generate a hash for the given data.""" data_string = json.dumps(data, sort_keys=True) return hashlib.md5(data_string.encode()).hexdigest() def batch_documents( documents: List[Dict[str, Any]], batch_size: int ) -> Generator[List[Dict[str, Any]], None, None]: """Split the documents into batches.""" for i in range(0, len(documents), batch_size): yield documents[i : i + batch_size]