in chunking/chunkers/doc_analysis_chunker.py [0:0]
def __init__(self, data, max_chunk_size=None, minimum_chunk_size=None, token_overlap=None):
super().__init__(data)
self.max_chunk_size = max_chunk_size or int(os.getenv("NUM_TOKENS", "2048"))
self.minimum_chunk_size = minimum_chunk_size or int(os.getenv("MIN_CHUNK_SIZE", "100"))
self.token_overlap = token_overlap or int(os.getenv("TOKEN_OVERLAP", "100"))
self.docint_client = DocumentIntelligenceClient()
self.supported_formats = self.docint_client.file_extensions