import os
import sys

import numpy as np


idx_machine = int(sys.argv[1])

IDX_DONE = set(
    [
        0,
        1,
        10,
        105,
        106,
        107,
        108,
        109,
        11,
        110,
        111,
        112,
        113,
        12,
        124,
        125,
        126,
        127,
        128,
        129,
        13,
        130,
        131,
        132,
        14,
        143,
        144,
        145,
        146,
        147,
        148,
        149,
        15,
        150,
        151,
        16,
        162,
        163,
        164,
        165,
        166,
        167,
        168,
        169,
        17,
        170,
        18,
        181,
        182,
        183,
        184,
        185,
        186,
        187,
        188,
        189,
        2,
        29,
        3,
        30,
        31,
        32,
        33,
        34,
        35,
        36,
        37,
        4,
        48,
        49,
        5,
        50,
        51,
        52,
        53,
        54,
        55,
        56,
        6,
        67,
        68,
        69,
        7,
        70,
        71,
        72,
        73,
        74,
        75,
        8,
        86,
        87,
        88,
        89,
        9,
        90,
        91,
        92,
        93,
        94,
    ]
)
IDX_REMAINING = [idx for idx in range(200) if idx not in IDX_DONE]
NUM_MACHINES = 21
IDX = [el.tolist() for el in np.array_split(IDX_REMAINING, NUM_MACHINES)][idx_machine]


for idx in IDX:
    os.system(f"python3 m4/sourcing/data_collection/callers/extract_html_get_image_urls.py {idx}")
