in src/data_load/load.py [0:0]
def verify_references(dir_name, batch_size=20, ingestion_sequence=""):
success = []
failed = []
with open(ingestion_sequence) as file:
sequence = json.load(file)
for entry in sequence:
record_ids = []
fileName = entry.get("FileName")
filepath = os.path.join(dir_name, fileName)
filepath_normalized = os.path.normpath(filepath)
logger.debug(f"Verifying file: {filepath_normalized}")
if filepath_normalized.endswith(".json"):
with open(filepath_normalized) as file:
data_object = json.load(file)
ingested_data = data_object["ReferenceData"]
else:
return
cur_batch = 0
for ingested_datum in ingested_data:
if "id" in ingested_datum:
record_ids.append(reference_data_id(ingested_datum))
cur_batch += 1
if cur_batch >= batch_size:
logger.debug(f"Searching records with batch size {cur_batch}")
s, f = verify_ids(record_ids)
success += s
failed += f
cur_batch = 0
record_ids = []
if cur_batch > 0:
logger.debug(
f"Searching remaining records with batch size {cur_batch}")
s, f = verify_ids(record_ids)
success += s
failed += f
return success, failed