in lambda/indexer/jobcomplete.py [0:0]
def get_document(dsId, indexId, s3url, item, text):
bucket, key, file_name = parse_s3url(s3url)
region = get_bucket_region(bucket)
document = {
"Id": s3url,
"Title": file_name,
"Attributes": [
{
"Key": "_data_source_id",
"Value": {
"StringValue": dsId
}
},
{
"Key": "_data_source_sync_job_execution_id",
"Value": {
"StringValue": item['sync_job_id']
}
},
{
"Key": "_source_uri",
"Value": {
"StringValue": f"https://s3.{region}.amazonaws.com/{bucket}/{key}"
}
}
],
"Blob": text
}
# merge metadata
metadata = get_s3jsondata(item['metadata_url'])
if metadata.get("DocumentId"):
logger.error(f"Metadata may not override: DocumentId")
if metadata.get("ContentType"):
logger.error(f"Metadata may not override: ContentType")
if metadata.get("Title"):
logger.info(f"Set 'Title' to: \"{metadata['Title']}\"")
document['Title'] = metadata['Title']
if metadata.get("Attributes"):
logger.info(f"Set 'Attributes'")
metadata_attributes = get_metadata_attributes(metadata)
document["Attributes"] += metadata_attributes
if metadata.get("AccessControlList"):
logger.info(f"Set 'AccessControlList'")
document["AccessControlList"] = metadata['AccessControlList']
return document