in arctic_inference/vllm/config.py [0:0]
def __post_init__(self):
use_suffix = (self.method
== "suffix") or (self.method is None
and self.enable_suffix_decoding)
use_hybrid = (self.method == "arctic"
and self.enable_suffix_decoding)
if (use_suffix or self.method == "arctic") and \
self.disable_by_batch_size is None:
logger.info("Defaulting disable_by_batch_size to 64")
self.disable_by_batch_size = 64
if use_hybrid:
self.suffix_speculative_tokens = self.suffix_cache_max_depth
if use_suffix:
self.method = "suffix"
self.enable_suffix_decoding = True
self.num_speculative_tokens = self.suffix_cache_max_depth
self._verify_args()
else:
self._orig_post_init()