def configure_chunking()

in yourbench/main.py [0:0]


def configure_chunking(enabled: bool) -> dict:
    """Configure chunking stage."""
    config = {"run": enabled}

    if not enabled:
        return config

    if Confirm.ask("\nConfigure chunking parameters?", default=False):
        chunk_config = {}
        chunk_config["l_max_tokens"] = IntPrompt.ask("Max tokens per chunk", default=DEFAULT_CHUNK_TOKENS)
        chunk_config["token_overlap"] = IntPrompt.ask("Token overlap", default=0)
        chunk_config["encoding_name"] = Prompt.ask("Tokenizer encoding", default="cl100k_base")

        # Multi-hop configuration
        if Confirm.ask("Configure multi-hop parameters?", default=True):
            chunk_config["h_min"] = IntPrompt.ask("Min chunks for multi-hop", default=DEFAULT_H_MIN)
            chunk_config["h_max"] = IntPrompt.ask("Max chunks for multi-hop", default=DEFAULT_H_MAX)
            chunk_config["num_multihops_factor"] = IntPrompt.ask("Multi-hop factor", default=DEFAULT_MULTIHOP_FACTOR)

        config["chunking_configuration"] = chunk_config
    else:
        config["chunking_configuration"] = {
            "l_max_tokens": DEFAULT_CHUNK_TOKENS,
            "token_overlap": 0,
            "encoding_name": "cl100k_base",
            "h_min": DEFAULT_H_MIN,
            "h_max": DEFAULT_H_MAX,
            "num_multihops_factor": DEFAULT_MULTIHOP_FACTOR,
        }

    return config