in recipes/joint_training_vox_populi/cpc/TransformerCPC.cpp [145:172]
std::vector<Variable> TransformerCPC::forward(
const std::vector<Variable>& input) {
// previous step[optionally], input, padMask
// padMask should be empty if previous step is provided
// padMask is expected to have "1" on the used positions and "0" on padded
// positions
if (input.size() < 2) {
throw std::invalid_argument(
"Invalid inputs for transformer block: there should be at least input and mask");
}
auto x = input.at(input.size() - 2);
if (!input.back().isempty() && x.dims(2) != input.back().dims(1)) {
throw std::invalid_argument(
"Invalid inputs for transformer block: input and Mask batch sizes are different");
}
float f = 1.0;
if (train_ && (af::randu(1).scalar<float>() < pLayerdrop_)) {
f = 0.0;
}
if (preLN_) {
auto h = (f * (*norm1_)(selfAttention(input))).as(x.type()) + x;
return {f * (*norm2_)(mlp(h)).as(h.type()) + h};
} else {
auto h = (*norm1_)((f * selfAttention(input)).as(x.type()) + x);
return {(*norm2_)((f * mlp(h)).as(h.type()) + h)};
}
}