recipes/mling_pl/mling_large.cpp (96 lines of code) (raw):

/** * Copyright (c) Facebook, Inc. and its affiliates. * * Adapted from Tatiana's ctc_letters_st3_ls100h_slimIPL_dp03_dyndp architecture * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include <iostream> #include "flashlight/fl/contrib/modules/modules.h" #include "flashlight/fl/flashlight.h" #include "flashlight/fl/nn/modules/modules.h" namespace slimIPL { class myModel : public fl::Container { public: myModel(int64_t nFeature, int64_t nLabel) { convFrontend_->add( std::make_shared<fl::View>(af::dim4(-1, 1, nFeature, 0))); // Time x 1 x nFeature x Batch std::vector<int> lnDims = {0, 1, 2}; convFrontend_->add(std::make_shared<fl::LayerNorm>(lnDims)); convFrontend_->add( // std::make_shared<fl::Conv2D>(nFeature, 1536, 7, 1, 3, 1, -1, 0, 1, // 1)); std::make_shared<fl::Conv2D>(nFeature, 3072, 7, 1, 3, 1, -1, 0, 1, 1)); convFrontend_->add(std::make_shared<fl::GatedLinearUnit>(2)); convFrontend_->add(std::make_shared<fl::Dropout>(0.3)); convFrontend_->add(std::make_shared<fl::Reorder>(2, 0, 3, 1)); // nFeature x Time x Batch x 1 add(convFrontend_); for (int trIdx = 0; trIdx < 36; trIdx++) { auto layer = std::make_shared<fl::Transformer>( // 768, 192, 3072, 4, 920, 0.3, 0.3, false, false); 1536, 384, 6144, 4, 920, 0.3, 0.3, false, false); transformers_.push_back(layer); add(layer); } // linear_ = std::make_shared<fl::Linear>(768, nLabel); linear_ = std::make_shared<fl::Linear>(1536, nLabel); add(linear_); int nLanguages = 60; // LID_head_ = std::make_shared<fl::Linear>(768, nLanguages); LID_head_ = std::make_shared<fl::Linear>(1536, nLanguages); add(LID_head_); } std::vector<fl::Variable> forward( const std::vector<fl::Variable>& input) override { auto out = input[0]; auto xSizes = input[1].array(); float dp = -1; if (input.size() > 2) { dp = af::sum<float>(input[2].array()); } // expected input dims T x C x 1 x B out = convFrontend_->forward(out); int T = out.dims(0), B = out.dims(3); auto inputMaxSize = af::tile(af::max(xSizes), 1, B); af::array inputNotPaddedSize = af::ceil(xSizes * T / inputMaxSize); auto padMask = af::iota(af::dim4(T, 1), af::dim4(1, B)) < af::tile(inputNotPaddedSize, T, 1); for (int trIdx = 0; trIdx < transformers_.size(); trIdx++) { // NOTE: not required for inference // if (dp >= 0) { // transformers_[trIdx]->setDropout(dp); // transformers_[trIdx]->setLayerDropout(dp); // } out = transformers_[trIdx]->forward({out, fl::noGrad(padMask)}).front(); } auto ctc_head_out = linear_->forward(out); auto LID_head_out = LID_head_->forward(out); LID_head_out = fl::mean(LID_head_out.as(f32), std::vector<int>{1}).as(f32); LID_head_out = fl::logSoftmax(LID_head_out, 0); return { ctc_head_out.as(input[0].type()), LID_head_out}; //.as(input[0].type())}; } std::string prettyString() const override { std::ostringstream ss; ss << "Model myModel: "; ss << convFrontend_->prettyString() << "\n"; ss << "(reshaping happens here)\n"; for (int trIdx = 0; trIdx < transformers_.size(); trIdx++) { ss << transformers_[trIdx]->prettyString() << "\n"; } ss << "(inverse reshaping happens here)\n"; ss << "CTC head: " << linear_->prettyString() << "\n"; ss << "Language ID head: " << LID_head_->prettyString() << "\n"; return ss.str(); } private: myModel() = default; std::shared_ptr<fl::Sequential> convFrontend_{ std::make_shared<fl::Sequential>()}; std::vector<std::shared_ptr<fl::Transformer>> transformers_; std::shared_ptr<fl::Linear> linear_; std::shared_ptr<fl::Linear> LID_head_; FL_SAVE_LOAD_WITH_BASE( fl::Container, convFrontend_, transformers_, linear_, LID_head_) }; } // namespace slimIPL extern "C" fl::Module* createModule(int64_t nFeature, int64_t nLabel) { auto m = std::make_unique<slimIPL::myModel>(nFeature, nLabel); return m.release(); } CEREAL_REGISTER_TYPE(slimIPL::myModel)