in aiops/Pathformer_ICLR2024/layers/Layer.py [0:0]
def __init__(self, device, d_model, d_ff, num_nodes, patch_nums, patch_size, dynamic, factorized, layer_number):
super(Transformer_Layer, self).__init__()
self.device = device
self.d_model = d_model
self.num_nodes = num_nodes
self.dynamic = dynamic
self.patch_nums = patch_nums
self.patch_size = patch_size
self.layer_number = layer_number
##intra_patch_attention
self.intra_embeddings = nn.Parameter(torch.rand(self.patch_nums, 1, 1, self.num_nodes, 16),
requires_grad=True)
self.embeddings_generator = nn.ModuleList([nn.Sequential(*[
nn.Linear(16, self.d_model)]) for _ in range(self.patch_nums)])
self.intra_d_model = self.d_model
self.intra_patch_attention = Intra_Patch_Attention(self.intra_d_model, factorized=factorized)
self.weights_generator_distinct = WeightGenerator(self.intra_d_model, self.intra_d_model, mem_dim=16, num_nodes=num_nodes,
factorized=factorized, number_of_weights=2)
self.weights_generator_shared = WeightGenerator(self.intra_d_model, self.intra_d_model, mem_dim=None, num_nodes=num_nodes,
factorized=False, number_of_weights=2)
self.intra_Linear = nn.Linear(self.patch_nums, self.patch_nums*self.patch_size)
##inter_patch_attention
self.stride = patch_size
# patch_num = int((context_window - cut_size) / self.stride + 1)
self.inter_d_model = self.d_model * self.patch_size
##inter_embedding
self.emb_linear = nn.Linear(self.inter_d_model, self.inter_d_model)
# Positional encoding
self.W_pos = positional_encoding(pe='zeros', learn_pe=True, q_len=self.patch_nums, d_model=self.inter_d_model)
n_heads = self.d_model
d_k = self.inter_d_model // n_heads
d_v = self.inter_d_model // n_heads
self.inter_patch_attention = Inter_Patch_Attention(self.inter_d_model, self.inter_d_model, n_heads, d_k, d_v, attn_dropout=0,
proj_dropout=0.1, res_attention=False)
##Normalization
self.norm_attn = nn.Sequential(Transpose(1,2), nn.BatchNorm1d(self.d_model), Transpose(1,2))
self.norm_ffn = nn.Sequential(Transpose(1,2), nn.BatchNorm1d(self.d_model), Transpose(1,2))
##FFN
self.d_ff = d_ff
self.dropout = nn.Dropout(0.1)
self.ff = nn.Sequential(nn.Linear(self.d_model, self.d_ff, bias=True),
nn.GELU(),
nn.Dropout(0.2),
nn.Linear(self.d_ff, self.d_model, bias=True))