in src/optimum/nvidia/runtime.py [0:0]
def default_executor_config(config: Dict[str, Any]) -> "ExecutorConfig":
build_config = config["build_config"]
plugin_config = config["build_config"]["plugin_config"]
max_blocks_per_sequence = math.floor(
build_config["max_seq_len"] / plugin_config["tokens_per_block"]
)
return ExecutorConfig(
enable_chunked_context=is_post_ampere(),
kv_cache_config=KvCacheConfig(
enable_block_reuse=True,
max_tokens=build_config["max_beam_width"]
* plugin_config["tokens_per_block"]
* max_blocks_per_sequence,
),
)