fairseq/model_parallel/modules/multihead_attention.py fairseq/modules/multihead_attention.py fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py fairseq/modules/transformer_layer.py fairseq/model_parallel/modules/multihead_attention.py fairseq/modules/sparse_multihead_attention.py fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py fairseq/modules/dynamic_convolution.py