maga_transformer/cpp/normal_engine/NormalEngine.h (58 lines of code) (raw):

#pragma once #include <atomic> #include <chrono> #include <iostream> #include <memory> #include <thread> #include "absl/status/status.h" #include "kmonitor/client/MetricsReporter.h" #include "maga_transformer/cpp/engine_base/EngineBase.h" #include "maga_transformer/cpp/cache/CacheManager.h" #include "maga_transformer/cpp/dataclass/EngineInitParameter.h" #include "maga_transformer/cpp/engine_base/Executor.h" #include "maga_transformer/cpp/models/GptModel.h" #include "maga_transformer/cpp/schedulers/SchedulerBase.h" #include "maga_transformer/cpp/system_prompt/SystemPrompt.h" #include "maga_transformer/cpp/metrics/RtpLLMMetrics.h" #include "maga_transformer/cpp/dataclass/LoadBalance.h" namespace rtp_llm { class NormalEngine: public EngineBase { public: NormalEngine(const EngineInitParams& params); ~NormalEngine(); std::shared_ptr<GenerateStream> makeStream(const std::shared_ptr<GenerateInput>& input) override; std::shared_ptr<GenerateStream> enqueue(const std::shared_ptr<GenerateInput>& input) override; void enqueue(std::shared_ptr<GenerateStream>& stream) override; absl::StatusOr<GenerateStreamPtr> preRun(const std::shared_ptr<GenerateInput>& generate_input, preRunMode mode) override; absl::Status stop() override; LoadBalanceInfo getLoadBalanceInfo() override; absl::Status step(); absl::Status startLoop(); int64_t getLastScheduleTime() override; const rtp_llm::GptInitParameter gptInitParameter() const; void reportMetrics(RtpLLMEngineMetricsCollector collector) { if (metrics_reporter_) { metrics_reporter_->report<RtpLLMEngineMetrics, RtpLLMEngineMetricsCollector>(nullptr, &collector); } } private: void initScheduler(); std::shared_ptr<GenerateStream> enqueueMinFakeQuery(int32_t max_new_tokens); WarmUpResult warmUp(const EngineInitParams& params); WarmUpResult prefillWarmUp(const EngineInitParams& params); WarmUpResult decodeWarmUp(const EngineInitParams& params); void initLoadBalance(); absl::Status trySaveStepError() const; void loop(); void initCacheManager(std::optional<WarmUpResult> warm_up_result); absl::Status initSystemPrompt(); std::shared_ptr<GenerateInput> makeFakeInput(size_t seq_len); private: autil::ThreadPtr loop_thread_; std::atomic<bool> running_{false}; std::unique_ptr<Executor> executor_; const rtp_llm::GptInitParameter params_; StepRecorder step_recorder_; kmonitor::MetricsReporterPtr metrics_reporter_; }; } // namespace rtp_llm