def __init__()

in optimum/neuron/models/inference/backend/config.py [0:0]


    def __init__(
        self,
        checkpoint_id: str = None,
        checkpoint_revision: str = None,
        batch_size: Optional[int] = 1,
        max_batch_size: Optional[int] = None,
        continuous_batching: Optional[bool] = False,
        speculation_length: Optional[int] = 0,
        sequence_length: Optional[int] = 128,
        tp_degree: Optional[int] = 1,
        ep_degree: Optional[int] = 1,
        pp_degree: Optional[int] = 1,
        torch_dtype: Optional[Union[str, torch.dtype]] = torch.bfloat16,
        rpl_reduce_dtype: Optional[Union[str, torch.dtype]] = None,
        n_active_tokens: Optional[int] = None,
        max_context_length: Optional[int] = None,
        output_logits: Optional[bool] = False,
        padding_side: Optional[str] = "right",
        fused_qkv: Optional[bool] = False,
        vocab_parallel: Optional[bool] = False,
        sequence_parallel_enabled: Optional[bool] = False,
        is_chunked_prefill: Optional[bool] = False,
        flash_decoding_enabled: Optional[bool] = False,
        async_mode: Optional[bool] = False,
        qk_layernorm: Optional[bool] = False,
        attn_kernel_enabled: Optional[bool] = False,
        qkv_kernel_enabled: Optional[bool] = False,
        mlp_kernel_enabled: Optional[bool] = False,
        mlp_kernel_fuse_residual_add: Optional[bool] = False,
        enable_bucketing: Optional[bool] = False,
        target: Optional[str] = None,  # Set to "trn2" for trn2
        logical_nc_config: Optional[int] = 1,
        cc_pipeline_tiling_factor: Optional[int] = 2,
        num_cores_per_group: Optional[int] = 1,
        on_device_sampling: Optional[bool] = False,
        max_topk: Optional[int] = 256,
        start_rank_id: Optional[int] = 0,
        local_ranks_size: Optional[int] = None,
        capacity_factor: float = None,
        glu_mlp: bool = True,