def build_scheduler()

in chatlearn/models/vllm_module.py [0:0]


    def build_scheduler(self):
        self.seq_counter = Counter()
        if CURRENT_VLLM_VERSION == VLLMVersion.v_0_3_0:
            if self.scheduler is None:
                self.scheduler = Scheduler(self.scheduler_config, self.cache_config, None)
            else:
                BlockSpaceManagerImpl = get_block_manager_cls(None)
                self.scheduler.block_manager = BlockSpaceManagerImpl( # pylint: disable=abstract-class-instantiated
                    block_size=self.cache_config.block_size,
                    num_gpu_blocks=self.cache_config.num_gpu_blocks,
                    num_cpu_blocks=self.cache_config.num_cpu_blocks,
                    sliding_window=self.cache_config.sliding_window)
        elif CURRENT_VLLM_VERSION in [VLLMVersion.v_0_5_1, VLLMVersion.v_0_6_3]:
            if self.scheduler is None:
                self.scheduler = [
                    Scheduler(self.scheduler_config, self.cache_config, None,
                            self.parallel_config.pipeline_parallel_size)
                    for _ in range(self.parallel_config.pipeline_parallel_size)
                ]

                def get_tokenizer_for_seq(sequence):
                    tokenizer_group = self.get_tokenizer_group()
                    assert tokenizer_group, ("tokenizer_group cannot be None, "
                                            "make sure skip_tokenizer_init is False")
                    return tokenizer_group.get_lora_tokenizer(sequence.lora_request)

                tokenizer_for_seq = get_tokenizer_for_seq if CURRENT_VLLM_VERSION == VLLMVersion.v_0_6_3 \
                    else self.get_tokenizer_for_seq

                self.output_processor = (
                    SequenceGroupOutputProcessor.create_output_processor(
                        self.scheduler_config,
                        self.detokenizer,
                        self.scheduler,
                        self.seq_counter,
                        tokenizer_for_seq,
                        stop_checker=StopChecker(
                            self.scheduler_config.max_model_len,
                            tokenizer_for_seq,
                        ),
                    ))
                if CURRENT_VLLM_VERSION == VLLMVersion.v_0_6_3:
                    self.input_preprocessor = InputPreprocessor(self.model_config,
                                                                self.tokenizer)
                    self.cached_scheduler_outputs = [
                        SchedulerOutputState()
                        for _ in range(self.parallel_config.pipeline_parallel_size)
                    ]
                    self.scheduler_contexts = [
                        SchedulerContext(multi_step_stream_outputs=self.scheduler_config.multi_step_stream_outputs)
                        for _ in range(self.parallel_config.pipeline_parallel_size)
                    ]
                    self.use_cached_outputs = False
                    self.process_request_outputs_callback = None
                    self.tracer = None
            else:
                if CURRENT_VLLM_VERSION == VLLMVersion.v_0_6_3:
                    version = "selfattn"
                    if (self.scheduler_config.embedding_mode
                            or self.cache_config.is_attention_free):
                        version = "placeholder"
                else:
                    version = "v1"
                    if self.scheduler_config.use_v2_block_manager:
                        version = "v2"
                    if self.scheduler_config.embedding_mode:
                        version = "embedding"

                BlockSpaceManagerImpl = get_block_manager_cls(version)
                num_gpu_blocks = self.cache_config.num_gpu_blocks
                if num_gpu_blocks:
                    num_gpu_blocks //= self.pipeline_model_parallel_size()
                num_cpu_blocks = self.cache_config.num_cpu_blocks
                if num_cpu_blocks:
                    num_cpu_blocks //= self.pipeline_model_parallel_size()

                for scheduler in self.scheduler:
                    scheduler.block_manager = BlockSpaceManagerImpl( # pylint: disable=abstract-class-instantiated
                        block_size=self.cache_config.block_size,
                        num_gpu_blocks=num_gpu_blocks,
                        num_cpu_blocks=num_cpu_blocks,
                        sliding_window=self.cache_config.sliding_window,
                        enable_caching=self.cache_config.enable_prefix_caching)