in optimum_benchmark/backends/vllm/config.py [0:0]
def __post_init__(self):
# duplicates that are handled by the backend config directly
if "model" in self.engine_args:
raise ValueError("model should not be passed in `backend.engine_args`, use `backend.model` instead")
if "tokenizer" in self.engine_args:
raise ValueError("tokenizer should not be passed in `backend.engine_args`, use `backend.processor` instead")
if "device" in self.engine_args:
raise ValueError("device should not be passed in `backend.engine_args`, use `backend.device` instead")
if self.serving_mode not in ["offline", "online"]:
raise ValueError(f"Invalid serving_mode: {self.serving_mode}. Must be 'online' or 'offline'.")
# needed for task/library/model_type inference
self.model_kwargs = {
"revision": self.engine_args.get("revision", "main"),
"trust_remote_code": self.engine_args.get("trust_remote_code", False),
**self.model_kwargs,
}
self.processor_kwargs = {
"revision": self.engine_args.get("tokenizer_revision", "main"),
"trust_remote_code": self.engine_args.get("trust_remote_code", False),
**self.processor_kwargs,
}
super().__post_init__()
if self.engine_args.get("disable_log_stats", None) is None:
self.engine_args["disable_log_stats"] = True
if self.serving_mode == "online":
if self.engine_args.get("disable_log_requests", None) is None:
self.engine_args["disable_log_requests"] = True