arctic_inference/vllm/spec_dec/arctic_speculator.py [351:359]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for i in range(num_predict_tokens):
                static_next_tokens[i] = self.static_cuda_buffers[
                    "next_tokens"][i][:padded_size]

            if g is None:
                device = torch.cuda.current_device()
                with graph_capture(device=device) as capture_context:
                    g = torch.cuda.CUDAGraph()
                    with torch.cuda.graph(g, stream=capture_context.stream):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


arctic_inference/vllm/spec_dec/arctic_speculator.py [814:822]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for i in range(num_predict_tokens):
                static_next_tokens[i] = self.static_cuda_buffers[
                    "next_tokens"][i][:padded_size]

            if g is None:
                device = torch.cuda.current_device()
                with graph_capture(device=device) as capture_context:
                    g = torch.cuda.CUDAGraph()
                    with torch.cuda.graph(g, stream=capture_context.stream):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -