def convert_d_fine_checkpoint()

in src/transformers/models/d_fine/convert_d_fine_original_pytorch_checkpoint_to_hf.py [0:0]


def convert_d_fine_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, repo_id):
    """
    Copy/paste/tweak model's weights to our D-FINE structure.
    """

    # load default config
    config = get_d_fine_config(model_name)
    state_dict = load_original_state_dict(repo_id, model_name)
    state_dict.pop("decoder.valid_mask", None)
    state_dict.pop("decoder.anchors", None)
    model = DFineForObjectDetection(config)
    logger.info(f"Converting model {model_name}...")

    state_dict = convert_old_keys_to_new_keys(state_dict)
    state_dict.pop("decoder.model.decoder.up", None)
    state_dict.pop("decoder.model.decoder.reg_scale", None)

    # query, key and value matrices need special treatment
    read_in_q_k_v(state_dict, config, model_name)
    # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them
    for key in state_dict.copy().keys():
        if key.endswith("num_batches_tracked"):
            del state_dict[key]
        # for two_stage
        if "bbox_embed" in key or ("class_embed" in key and "denoising_" not in key):
            state_dict[key.split("model.decoder.")[-1]] = state_dict[key]

    # finally, create HuggingFace model and load state dict
    model.load_state_dict(state_dict)
    model.eval()

    # load image processor
    image_processor = RTDetrImageProcessor()

    # prepare image
    img = prepare_img()

    # preprocess image
    transformations = transforms.Compose(
        [
            transforms.Resize([640, 640], interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.ToTensor(),
        ]
    )
    original_pixel_values = transformations(img).unsqueeze(0)  # insert batch dimension

    encoding = image_processor(images=img, return_tensors="pt")
    pixel_values = encoding["pixel_values"]

    assert torch.allclose(original_pixel_values, pixel_values)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    pixel_values = pixel_values.to(device)

    outputs = model(pixel_values)

    if model_name == "dfine_x_coco":
        expected_slice_logits = torch.tensor(
            [
                [-4.844723, -4.7293096, -4.5971327],
                [-4.554266, -4.61723, -4.627926],
                [-4.3934402, -4.6064143, -4.139952],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.2565248, 0.5477609, 0.47644863],
                [0.7690029, 0.41423926, 0.46148556],
                [0.1688096, 0.19923759, 0.21118002],
            ]
        )
    elif model_name == "dfine_x_obj2coco":
        expected_slice_logits = torch.tensor(
            [
                [-4.230433, -6.6295037, -4.8339615],
                [-4.085411, -6.3280816, -4.695468],
                [-3.8968022, -6.336813, -4.67051],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.25707328, 0.54842496, 0.47624254],
                [0.76967394, 0.41272867, 0.45970756],
                [0.16882066, 0.19918433, 0.2112098],
            ]
        )
    elif model_name == "dfine_x_obj365":
        expected_slice_logits = torch.tensor(
            [
                [-6.3844957, -3.7549126, -4.6873264],
                [-5.8433194, -3.4490552, -3.3228905],
                [-6.5314736, -3.7856622, -4.895984],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.7703046, 0.41329497, 0.45932162],
                [0.16898105, 0.19876392, 0.21050783],
                [0.25134972, 0.5517619, 0.4864124],
            ]
        )
    elif model_name == "dfine_m_coco":
        expected_slice_logits = torch.tensor(
            [
                [-4.5187078, -4.71708, -4.117749],
                [-4.513984, -4.937715, -3.829125],
                [-4.830042, -6.931682, -3.1740026],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.25851426, 0.5489963, 0.4757598],
                [0.769683, 0.41411665, 0.45988125],
                [0.16866133, 0.19921188, 0.21207744],
            ]
        )
    elif model_name == "dfine_m_obj2coco":
        expected_slice_logits = torch.tensor(
            [
                [-4.520666, -7.6678333, -5.739887],
                [-4.5053635, -7.510611, -5.452532],
                [-4.70348, -5.6098466, -5.0199957],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.2567608, 0.5485795, 0.4767465],
                [0.77035284, 0.41236404, 0.4580645],
                [0.5498525, 0.27548885, 0.05886984],
            ]
        )
    elif model_name == "dfine_m_obj365":
        expected_slice_logits = torch.tensor(
            [
                [-5.770525, -3.1610885, -5.2807794],
                [-5.7809954, -3.768266, -5.1146393],
                [-6.180705, -3.7357295, -3.1651964],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.2529114, 0.5526663, 0.48270613],
                [0.7712474, 0.41294736, 0.457174],
                [0.5497157, 0.27588123, 0.05813372],
            ]
        )
    elif model_name == "dfine_l_coco":
        expected_slice_logits = torch.tensor(
            [
                [-4.068779, -5.169955, -4.339212],
                [-3.9461594, -5.0279613, -4.0161457],
                [-4.218292, -6.196324, -5.175245],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.2564867, 0.5489948, 0.4748876],
                [0.7693534, 0.4138953, 0.4598034],
                [0.16875696, 0.19875404, 0.21196914],
            ]
        )
    elif model_name == "dfine_l_obj365":
        expected_slice_logits = torch.tensor(
            [
                [-5.7953215, -3.4901116, -5.4394145],
                [-5.7032104, -3.671125, -5.76121],
                [-6.09466, -3.1512096, -4.285499],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.7693825, 0.41265628, 0.4606362],
                [0.25306237, 0.55187637, 0.4832178],
                [0.16892478, 0.19880727, 0.21115331],
            ]
        )
    elif model_name == "dfine_l_obj2coco_e25":
        expected_slice_logits = torch.tensor(
            [
                [-3.6098495, -6.633563, -5.1227236],
                [-3.682696, -6.9178205, -5.414557],
                [-4.491674, -6.0823426, -4.5718226],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.7697078, 0.41368833, 0.45879585],
                [0.2573691, 0.54856044, 0.47715297],
                [0.16895264, 0.19871138, 0.2115552],
            ]
        )
    elif model_name == "dfine_n_coco":
        expected_slice_logits = torch.tensor(
            [
                [-3.7827945, -5.0889463, -4.8341026],
                [-5.3046904, -6.2801714, -2.9276395],
                [-4.497901, -5.2670407, -6.2380104],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.73334837, 0.4270624, 0.39424777],
                [0.1680235, 0.1988639, 0.21031213],
                [0.25370035, 0.5534435, 0.48496848],
            ]
        )
    elif model_name == "dfine_s_coco":
        expected_slice_logits = torch.tensor(
            [
                [-3.8097816, -4.7724586, -5.994499],
                [-5.2974715, -9.499067, -6.1653666],
                [-5.3502765, -3.9530406, -6.3630295],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.7677696, 0.41479152, 0.46441072],
                [0.16912134, 0.19869131, 0.2123824],
                [0.2581653, 0.54818195, 0.47512347],
            ]
        )
    elif model_name == "dfine_s_obj2coco":
        expected_slice_logits = torch.tensor(
            [
                [-6.0208125, -7.532673, -5.0572147],
                [-3.3595953, -9.057545, -6.376975],
                [-4.3203554, -9.546032, -6.075504],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.16901012, 0.19883151, 0.21121952],
                [0.76784194, 0.41266578, 0.46402973],
                [00.2563128, 0.54797643, 0.47937632],
            ]
        )
    elif model_name == "dfine_s_obj365":
        expected_slice_logits = torch.tensor(
            [
                [-6.3807316, -4.320986, -6.4775343],
                [-6.5818424, -3.5009093, -5.75824],
                [-5.748005, -4.3228016, -4.003726],
            ]
        )
        expected_slice_boxes = torch.tensor(
            [
                [0.2532072, 0.5491191, 0.48222217],
                [0.76586807, 0.41175705, 0.46789962],
                [0.169111, 0.19844547, 0.21069047],
            ]
        )
    else:
        raise ValueError(f"Unknown d_fine_name: {model_name}")

    assert torch.allclose(outputs.logits[0, :3, :3], expected_slice_logits.to(outputs.logits.device), atol=1e-3)
    assert torch.allclose(outputs.pred_boxes[0, :3, :3], expected_slice_boxes.to(outputs.pred_boxes.device), atol=1e-4)

    if pytorch_dump_folder_path is not None:
        Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
        print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
        model.save_pretrained(pytorch_dump_folder_path)
        print(f"Saving image processor to {pytorch_dump_folder_path}")
        image_processor.save_pretrained(pytorch_dump_folder_path)

    if push_to_hub:
        # Upload model, image processor and config to the hub
        logger.info("Uploading PyTorch model and image processor to the hub...")
        config.push_to_hub(
            repo_id=repo_id,
            commit_message="Add config from convert_d_fine_original_pytorch_checkpoint_to_hf.py",
        )
        model.push_to_hub(
            repo_id=repo_id,
            commit_message="Add model from convert_d_fine_original_pytorch_checkpoint_to_hf.py",
        )
        image_processor.push_to_hub(
            repo_id=repo_id,
            commit_message="Add image processor from convert_d_fine_original_pytorch_checkpoint_to_hf.py",
        )