maga_transformer/cpp/devices/DeviceOps.cc

#include "maga_transformer/cpp/devices/DeviceOps.h" #include "OpData.h" namespace rtp_llm { DeviceOps::DeviceOps() {} DeviceOps::~DeviceOps() {} void DeviceOps::copy(const CopyParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } CloneOutput DeviceOps::clone(const CloneParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } TransposeOutput DeviceOps::transpose(const TransposeParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } ConvertOutput DeviceOps::convert(const ConvertParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } SelectOutput DeviceOps::select(const SelectParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } ConcatOutput DeviceOps::concat(const ConcatParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } SplitOutput DeviceOps::split(const SplitParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } LayernormOutput DeviceOps::layernorm(const LayernormParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } LayernormOutput DeviceOps::layernormWithStride(const LayernormWithStrideParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } SliceOutput DeviceOps::slice(const SliceParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AddBiasOutput DeviceOps::addbias(const AddBiasParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::loraLinearWithActivation(const LoraLinearWithActivationParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::gemm(const GemmParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } GroupedGemmOutput DeviceOps::groupedGemm(const GroupedGemmParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } MultiplyOutput DeviceOps::multiply(const MultiplyParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::embeddingLookup(const EmbeddingLookupParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::multimodalEmbedding(const MultimodalEmbeddingParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::activation(const ActivationParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::softmax(const SoftmaxParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } LossOutput DeviceOps::loss(const LossParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } MaskOutput DeviceOps::attentionMask(const MaskParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::mhaQKVGemm(const AttentionLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::mlaQKVGemm(const AttentionLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AttentionModuleOutput DeviceOps::contextAttention(const AttentionModuleParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AttentionModuleOutput DeviceOps::decoderSelfAttention(const AttentionModuleParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AttentionLayerOutput DeviceOps::mlaAttentionLayer(const AttentionLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AttentionModuleOutput DeviceOps::mlaContextAttention(const MlaAttentionModuleParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AttentionModuleOutput DeviceOps::mlaAbsorbAttention(const MlaAttentionModuleParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } void DeviceOps::mlaRotaryWriteKVCache(const MlaRotaryWriteKVCacheParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AttentionLayerOutput DeviceOps::attentionLayer(const AttentionLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::ffnLayer(const FfnLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::microBatchedFfnLayer(const FfnLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::moeFfnLayer(const FfnLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::moeSharedExpert(const FfnLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } MoeGateSelectOutput DeviceOps::moeGateSelect(const FfnLayerParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::moeFfn(const FfnLayerParams& params, const MoeGateSelectOutput& gate_outputs) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::moeFfnFp8(const FfnLayerParams& params, const MoeGateSelectOutput& gate_outputs) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::epMoeFfnLayer(const FfnLayerParams& params, const MoeGateSelectOutput& gate_output) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::deepEpMoeFfnLayer(const FfnLayerParams& params, const MoeGateSelectOutput& gate_outputs) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::deepEpLLMoeFfn(const FfnLayerParams& params, const MoeGateSelectOutput& gate_outputs) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } LoraLinearOutput DeviceOps::loraLinear(const LoraLinearParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } LoraLinearOutput DeviceOps::loraLinearWithAllReduce(const LoraLinearParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } MoeDispatchOutput DeviceOps::epDispatch(const MoeDispatchParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } MoeCombineOutput DeviceOps::epCombine(const MoeCombineParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } FfnLayerOutput DeviceOps::gatherCombineOutput(const MoeCombineOutput& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AllGatherLoraLinearOutput DeviceOps::allGatherloraLinear(const AllGatherLoraLinearParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } ReduceScatterLoraLinearOutput DeviceOps::loraLinearReduceScatter(const LoraLinearReduceScatterParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } GreedyOutput DeviceOps::sampleGreedy(const GreedyParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } void DeviceOps::sampleBeamSearch(const BeamSearchParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } void DeviceOps::broadcast(const BroadcastParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AllReduceOutput DeviceOps::allReduce(const AllReduceParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } void DeviceOps::allGather(const AllGatherParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } AllToAllOutput DeviceOps::allToAll(const AllToAllParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } void DeviceOps::reduceScatter(const ReduceScatterParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } BufferPtr DeviceOps::quantize(const QuantizeParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } PrepareAllReduceOutput DeviceOps::prepareAllReduce(const PrepareAllReduceParams& params) { return PrepareAllReduceOutput{params.buffer}; } void DeviceOps::bufMemset(Buffer& buf, int val, DeviceStream stream) { if (buf.where() == MemoryType::MEMORY_CPU || buf.where() == MemoryType::MEMORY_CPU_PINNED) { std::memset(buf.data(), val, buf.sizeBytes()); } else { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } } void DeviceOps::noBlockCopy(const CopyParams& params) { copy(params); } torch::Tensor DeviceOps::preprocessGemmWeightByKey(const std::string& key, torch::Tensor weight) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } torch::Tensor DeviceOps::packInt8TensorToPackedInt4(torch::Tensor weight) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } torch::Tensor DeviceOps::preprocessWeightsForMixedGemm(torch::Tensor weight, torch::ScalarType quant_type, const std::string &arch) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } std::vector<torch::Tensor> DeviceOps::symmetricQuantizeLastAxisOfBatchedMatrix(torch::Tensor weight, torch::ScalarType quant_type, const std::string &arch) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } torch::Tensor DeviceOps::preprocessWeightScale(torch::Tensor weight, torch::Tensor scale) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } void DeviceOps::perfRangePush(const std::string& name) const {} void DeviceOps::perfRangePop() const {} void DeviceOps::prepareCommBuffer(const PrepareCommBufferParams& params) { throw OpException(OpErrorType::ERROR_UNIMPLEMENTED); } } // namespace rtp_llm

maga_transformer/cpp/devices/DeviceOps.cc (204 lines of code) (raw):