def init()

in chatlearn/models/vllm/hooks/vllm_0_6_6/llm.py [0:0]


def init(self,
        model: str,
        tokenizer: Optional[str] = None,
        tokenizer_mode: str = "auto",
        skip_tokenizer_init: bool = False,
        trust_remote_code: bool = False,
        allowed_local_media_path: str = "",
        tensor_parallel_size: int = 1,
        dtype: str = "auto",
        quantization: Optional[str] = None,
        revision: Optional[str] = None,
        tokenizer_revision: Optional[str] = None,
        seed: int = 0,
        gpu_memory_utilization: float = 0.9,
        swap_space: float = 4,
        cpu_offload_gb: float = 0,
        enforce_eager: Optional[bool] = None,
        max_seq_len_to_capture: int = 8192,
        disable_custom_all_reduce: bool = False,
        disable_async_output_proc: bool = False,
        hf_overrides: Optional[HfOverrides] = None,
        mm_processor_kwargs: Optional[Dict[str, Any]] = None,
        # After positional args are removed, move this right below `model`
        task: TaskOption = "auto",
        override_pooler_config: Optional[PoolerConfig] = None,
        compilation_config: Optional[Union[int, Dict[str, Any]]] = None,
        **kwargs,) -> None:
    '''
    LLM constructor.

    Note: if enforce_eager is unset (enforce_eager is None)
    it defaults to False.
    '''

    if "disable_log_stats" not in kwargs:
        kwargs["disable_log_stats"] = True

    if compilation_config is not None:
        if isinstance(compilation_config, (int, dict)):
            compilation_config_instance = CompilationConfig.from_cli(
                str(compilation_config))
        else:
            compilation_config_instance = compilation_config
    else:
        compilation_config_instance = None

    engine_args = AsyncEngineArgs(
        model=model,
        task=task,
        tokenizer=tokenizer,
        tokenizer_mode=tokenizer_mode,
        skip_tokenizer_init=skip_tokenizer_init,
        trust_remote_code=trust_remote_code,
        allowed_local_media_path=allowed_local_media_path,
        tensor_parallel_size=tensor_parallel_size,
        dtype=dtype,
        quantization=quantization,
        revision=revision,
        tokenizer_revision=tokenizer_revision,
        seed=seed,
        gpu_memory_utilization=gpu_memory_utilization,
        swap_space=swap_space,
        cpu_offload_gb=cpu_offload_gb,
        enforce_eager=enforce_eager,
        max_seq_len_to_capture=max_seq_len_to_capture,
        disable_custom_all_reduce=disable_custom_all_reduce,
        disable_async_output_proc=disable_async_output_proc,
        hf_overrides=hf_overrides,
        mm_processor_kwargs=mm_processor_kwargs,
        override_pooler_config=override_pooler_config,
        compilation_config=compilation_config_instance,
        **kwargs,
    )
    # Logic to switch between engines is done at runtime instead of import
    # to avoid import order issues
    self.engine_class = self.get_engine_class()

    # TODO(rob): enable mp by default (issue with fork vs spawn)
    self.llm_engine = self.engine_class.from_engine_args(
        engine_args, usage_context=UsageContext.LLM_CLASS)

    self.request_counter = Counter()