def __init__()

in detic/modeling/meta_arch/d2_deformable_detr.py [0:0]


    def __init__(self, cfg):
        super().__init__()
        self.with_image_labels = cfg.WITH_IMAGE_LABELS
        self.weak_weight = cfg.MODEL.DETR.WEAK_WEIGHT

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.test_topk = cfg.TEST.DETECTIONS_PER_IMAGE
        self.num_classes = cfg.MODEL.DETR.NUM_CLASSES
        self.mask_on = cfg.MODEL.MASK_ON
        hidden_dim = cfg.MODEL.DETR.HIDDEN_DIM
        num_queries = cfg.MODEL.DETR.NUM_OBJECT_QUERIES

        # Transformer parameters:
        nheads = cfg.MODEL.DETR.NHEADS
        dropout = cfg.MODEL.DETR.DROPOUT
        dim_feedforward = cfg.MODEL.DETR.DIM_FEEDFORWARD
        enc_layers = cfg.MODEL.DETR.ENC_LAYERS
        dec_layers = cfg.MODEL.DETR.DEC_LAYERS
        num_feature_levels = cfg.MODEL.DETR.NUM_FEATURE_LEVELS
        two_stage = cfg.MODEL.DETR.TWO_STAGE
        with_box_refine = cfg.MODEL.DETR.WITH_BOX_REFINE

        # Loss parameters:
        giou_weight = cfg.MODEL.DETR.GIOU_WEIGHT
        l1_weight = cfg.MODEL.DETR.L1_WEIGHT
        deep_supervision = cfg.MODEL.DETR.DEEP_SUPERVISION
        cls_weight = cfg.MODEL.DETR.CLS_WEIGHT
        focal_alpha = cfg.MODEL.DETR.FOCAL_ALPHA

        N_steps = hidden_dim // 2
        d2_backbone = MaskedBackbone(cfg)
        backbone = Joiner(d2_backbone, PositionEmbeddingSine(N_steps, normalize=True))

        transformer = DeformableTransformer(
            d_model=hidden_dim,
            nhead=nheads,
            num_encoder_layers=enc_layers,
            num_decoder_layers=dec_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation="relu",
            return_intermediate_dec=True,
            num_feature_levels=num_feature_levels,
            dec_n_points=4,
            enc_n_points=4,
            two_stage=two_stage,
            two_stage_num_proposals=num_queries)

        self.detr = DeformableDETR(
            backbone, transformer, num_classes=self.num_classes, 
            num_queries=num_queries,
            num_feature_levels=num_feature_levels,
            aux_loss=deep_supervision,
            with_box_refine=with_box_refine,
            two_stage=two_stage,
        )

        if self.mask_on:
            assert 0, 'Mask is not supported yet :('

        matcher = HungarianMatcher(
            cost_class=cls_weight, cost_bbox=l1_weight, cost_giou=giou_weight)
        weight_dict = {"loss_ce": cls_weight, "loss_bbox": l1_weight}
        weight_dict["loss_giou"] = giou_weight
        if deep_supervision:
            aux_weight_dict = {}
            for i in range(dec_layers - 1):
                aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
            weight_dict.update(aux_weight_dict)
        print('weight_dict', weight_dict)
        losses = ["labels", "boxes", "cardinality"]
        if self.mask_on:
            losses += ["masks"]
        self.criterion = CustomSetCriterion(
            self.num_classes, matcher=matcher, weight_dict=weight_dict, 
            focal_alpha=focal_alpha, 
            losses=losses,
            use_fed_loss=cfg.MODEL.DETR.USE_FED_LOSS
        )
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std