in yourbench/main.py [0:0]
def configure_chunking(enabled: bool) -> dict:
"""Configure chunking stage."""
config = {"run": enabled}
if not enabled:
return config
if Confirm.ask("\nConfigure chunking parameters?", default=False):
chunk_config = {}
chunk_config["l_max_tokens"] = IntPrompt.ask("Max tokens per chunk", default=DEFAULT_CHUNK_TOKENS)
chunk_config["token_overlap"] = IntPrompt.ask("Token overlap", default=0)
chunk_config["encoding_name"] = Prompt.ask("Tokenizer encoding", default="cl100k_base")
# Multi-hop configuration
if Confirm.ask("Configure multi-hop parameters?", default=True):
chunk_config["h_min"] = IntPrompt.ask("Min chunks for multi-hop", default=DEFAULT_H_MIN)
chunk_config["h_max"] = IntPrompt.ask("Max chunks for multi-hop", default=DEFAULT_H_MAX)
chunk_config["num_multihops_factor"] = IntPrompt.ask("Multi-hop factor", default=DEFAULT_MULTIHOP_FACTOR)
config["chunking_configuration"] = chunk_config
else:
config["chunking_configuration"] = {
"l_max_tokens": DEFAULT_CHUNK_TOKENS,
"token_overlap": 0,
"encoding_name": "cl100k_base",
"h_min": DEFAULT_H_MIN,
"h_max": DEFAULT_H_MAX,
"num_multihops_factor": DEFAULT_MULTIHOP_FACTOR,
}
return config