in chunking/chunkers/langchain_chunker.py [0:0]
def __init__(self, data):
"""
Initializes the TextChunker with the given data and sets up chunking parameters from environment variables.
Args:
data (str): The document content to be chunked.
"""
super().__init__(data)
self.max_chunk_size = int(os.getenv("NUM_TOKENS", "2048"))
self.minimum_chunk_size = int(os.getenv("MIN_CHUNK_SIZE", "100"))
self.token_overlap = int(os.getenv("TOKEN_OVERLAP", "100"))
self.supported_formats = {
"md": "markdown",
"txt": "text",
"html": "html",
"shtml": "html",
"htm": "html",
"py": "python",
"csv": "csv",
"xml": "xml"
}