in pca-server/src/pca/pcakendrasearch.py [0:0]
def put_kendra_document(indexId, analysisUri, conversationAnalytics, text):
"""
index the prepared transcript in Kendra, setting all the document index attributes to support
filtering, faceting, and search.
"""
print(f"put_document(indexId={indexId}, analysisUri={analysisUri}, conversationAnalytics={conversationAnalytics}, text='{text[0:100]}...')")
document = {
"Id": conversationAnalytics["SourceInformation"][0]["TranscribeJobInfo"]["MediaOriginalUri"],
"Title": conversationAnalytics["SourceInformation"][0]["TranscribeJobInfo"]["TranscriptionJobName"],
"Attributes": [
{
"Key": "_source_uri",
"Value": {
"StringValue": get_http_from_s3_uri(conversationAnalytics["SourceInformation"][0]["TranscribeJobInfo"]["MediaFileUri"])
}
},
{
"Key": "ANALYSIS_URI",
"Value": {
"StringValue": analysisUri
}
},
{
"Key": "DATETIME",
"Value": {
"DateValue": iso8601_datetime(conversationAnalytics["ConversationTime"])
}
},
{
"Key": "GUID",
"Value": {
"StringValue": conversationAnalytics["GUID"]
}
},
{
"Key": "AGENT",
"Value": {
"StringValue": conversationAnalytics["Agent"]
}
},
{
"Key": "DURATION",
"Value": {
"StringValue": durationBucket(conversationAnalytics["Duration"])
}
},
{
"Key": "ENTITY_PERSON",
"Value": {
"StringListValue": get_entity_values("PERSON", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_LOCATION",
"Value": {
"StringListValue": get_entity_values("LOCATION", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_ORGANIZATION",
"Value": {
"StringListValue": get_entity_values("ORGANIZATION", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_COMMERCIAL_ITEM",
"Value": {
"StringListValue": get_entity_values("COMMERCIAL_ITEM", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_EVENT",
"Value": {
"StringListValue": get_entity_values("EVENT", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_DATE",
"Value": {
"StringListValue": get_entity_values("DATE", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_QUANTITY",
"Value": {
"StringListValue": get_entity_values("QUANTITY", conversationAnalytics["CustomEntities"])
}
},
{
"Key": "ENTITY_TITLE",
"Value": {
"StringListValue": get_entity_values("TITLE", conversationAnalytics["CustomEntities"])
}
}
],
"Blob": text
}
documents = [document]
print("KENDRA.batch_put_document: " + json.dumps(documents, default=str)[0:1000] + "...")
result = KENDRA.batch_put_document(
IndexId = indexId,
Documents = documents
)
if 'FailedDocuments' in result and len(result['FailedDocuments']) > 0:
print("ERROR: Failed to index document: " + result['FailedDocuments'][0]['ErrorMessage'])
print("result: " + json.dumps(result))
return True