def verify_references()

in src/data_load/load.py [0:0]


def verify_references(dir_name, batch_size=20, ingestion_sequence=""):
    success = []
    failed = []
    with open(ingestion_sequence) as file:
        sequence = json.load(file)

    for entry in sequence:
        record_ids = []
        fileName = entry.get("FileName")
        filepath = os.path.join(dir_name, fileName)
        filepath_normalized = os.path.normpath(filepath)
        logger.debug(f"Verifying file: {filepath_normalized}")
        if filepath_normalized.endswith(".json"):
            with open(filepath_normalized) as file:
                data_object = json.load(file)
                ingested_data = data_object["ReferenceData"]
        else:
            return

        cur_batch = 0

        for ingested_datum in ingested_data:
            if "id" in ingested_datum:
                record_ids.append(reference_data_id(ingested_datum))
                cur_batch += 1

            if cur_batch >= batch_size:
                logger.debug(f"Searching records with batch size {cur_batch}")
                s, f = verify_ids(record_ids)
                success += s
                failed += f
                cur_batch = 0
                record_ids = []

        if cur_batch > 0:
            logger.debug(
                f"Searching remaining records with batch size {cur_batch}")
            s, f = verify_ids(record_ids)
            success += s
            failed += f

    return success, failed