def put_kendra_document()

in pca-server/src/pca/pcakendrasearch.py [0:0]


def put_kendra_document(indexId, analysisUri, conversationAnalytics, text):
    """
    index the prepared transcript in Kendra, setting all the document index attributes to support 
    filtering, faceting, and search.
    """
    print(f"put_document(indexId={indexId}, analysisUri={analysisUri}, conversationAnalytics={conversationAnalytics}, text='{text[0:100]}...')")
    document = {
        "Id": conversationAnalytics["SourceInformation"][0]["TranscribeJobInfo"]["MediaOriginalUri"],
        "Title": conversationAnalytics["SourceInformation"][0]["TranscribeJobInfo"]["TranscriptionJobName"],
        "Attributes": [
            {
                "Key": "_source_uri",
                "Value": {
                    "StringValue": get_http_from_s3_uri(conversationAnalytics["SourceInformation"][0]["TranscribeJobInfo"]["MediaFileUri"])
                }
            },
            {
                "Key": "ANALYSIS_URI",
                "Value": {
                    "StringValue": analysisUri
                }
            },
            {
                "Key": "DATETIME",
                "Value": {
                    "DateValue": iso8601_datetime(conversationAnalytics["ConversationTime"])
                }
            },
            {
                "Key": "GUID",
                "Value": {
                    "StringValue": conversationAnalytics["GUID"]
                }
            },
            {
                "Key": "AGENT",
                "Value": {
                    "StringValue": conversationAnalytics["Agent"]
                }
            },
            {
                "Key": "DURATION",
                "Value": {
                    "StringValue": durationBucket(conversationAnalytics["Duration"])
                }
            },
            {
                "Key": "ENTITY_PERSON",
                "Value": {
                    "StringListValue": get_entity_values("PERSON", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_LOCATION",
                "Value": {
                    "StringListValue": get_entity_values("LOCATION", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_ORGANIZATION",
                "Value": {
                    "StringListValue": get_entity_values("ORGANIZATION", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_COMMERCIAL_ITEM",
                "Value": {
                    "StringListValue": get_entity_values("COMMERCIAL_ITEM", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_EVENT",
                "Value": {
                    "StringListValue": get_entity_values("EVENT", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_DATE",
                "Value": {
                    "StringListValue": get_entity_values("DATE", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_QUANTITY",
                "Value": {
                    "StringListValue": get_entity_values("QUANTITY", conversationAnalytics["CustomEntities"])
                }
            },
            {
                "Key": "ENTITY_TITLE",
                "Value": {
                    "StringListValue": get_entity_values("TITLE", conversationAnalytics["CustomEntities"])
                }
            }
        ],
        "Blob": text
    }
    documents = [document]
    print("KENDRA.batch_put_document: " + json.dumps(documents, default=str)[0:1000] + "...")
    result = KENDRA.batch_put_document(
        IndexId = indexId,
        Documents = documents
    )
    if 'FailedDocuments' in result and len(result['FailedDocuments']) > 0:
        print("ERROR: Failed to index document: " + result['FailedDocuments'][0]['ErrorMessage'])
    print("result: " + json.dumps(result))
    return True