arctic_inference/vllm/spec_dec/arctic_speculator.py [166:173]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    bias=False,
                    quant_config=quant_config,
                    skip_quantization=False,
                )
                qhead.quant_method = OriginalFp8LinearMethod(
                    quant_config=quant_config)
                self.qhead = nn.ModuleList([qhead] *
                                           self.max_speculative_tokens)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


arctic_inference/vllm/spec_dec/arctic_speculator.py [458:465]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    bias=False,
                    quant_config=quant_config,
                    skip_quantization=False,
                )
                qhead.quant_method = OriginalFp8LinearMethod(
                    quant_config=quant_config)
                self.qhead = nn.ModuleList([qhead] *
                                           self.max_speculative_tokens)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -