in src/data_load/load.py [0:0]
def update_work_products_metadata(data, file_location_map, base_dir):
reference_pattern = "{}:reference-data".format(
config.get("CONNECTION", "data-partition-id"))
master_pattern = "{}:master-data".format(
config.get("CONNECTION", "data-partition-id"))
updated_manifest = json.dumps(data).replace(
'osdu:reference-data', reference_pattern).replace(
'osdu:master-data', master_pattern).replace(
"surrogate-key:file-1", "surrogate-key:dataset--1:0:0").replace(
"surrogate-key:wpc-1", "surrogate-key:wpc--1:0:0")
data = json.loads(updated_manifest)
logger.debug(f"Base directory is {base_dir}")
update_legal_and_acl_tags(data["WorkProduct"])
add_metadata(data["WorkProductComponents"])
add_metadata(data["Datasets"])
with open(file_location_map) as file:
location_map = json.load(file)
file_name = data["WorkProduct"]["data"]["Name"]
if file_name in location_map:
file_source = location_map[file_name]["file_source"]
file_id = location_map[file_name]["file_id"]
file_version = location_map[file_name]["file_record_version"]
# Update Dataset with Generated File Id and File Source.
data["Datasets"][0]["id"] = file_id
data["Datasets"][0]["data"]["DatasetProperties"]["FileSourceInfo"]["FileSource"] = file_source
del data["Datasets"][0]["data"]["DatasetProperties"]["FileSourceInfo"]["PreloadFilePath"]
# Update FileId in WorkProductComponent
data["WorkProductComponents"][0]["data"]["Datasets"][0] = file_id + \
":" + file_version
# Todo: remove this if not required later.
if id not in data["WorkProduct"]:
# Add the Work-Product Id -> opendes:work-product--WorkProduct:load_document_69_D_CH_11_pdf.json
data["WorkProduct"]["id"] = generate_workproduct_id(
file_name, base_dir)
else:
logger.warning(f"File {file_name} does not exist")
logger.debug(f"data to upload workproduct \n {data}")
return data