in aws-kendra-transcribe-media-search/lambda/indexer/crawler.py [0:0]
def lambda_handler(event, context):
logger.info("Received event: %s" % json.dumps(event))
# Handle Delete event from Cloudformation custom resource
# In all other cases start crawler
if (('RequestType' in event) and (event['RequestType'] == 'Delete')):
logger.info("Cfn Delete event - no action - return Success")
return exit_status(event, context, cfnresponse.SUCCESS)
# exit if crawler is already running
crawler_state = get_crawler_state(STACK_NAME)
if (crawler_state):
logger.info(f"crawler sync state: {crawler_state}")
if (crawler_state == "RUNNING"):
logger.info("Previous crawler invocation is running. Exiting")
return exit_status(event, context, cfnresponse.SUCCESS)
#Make _category facetable if needed
if (MAKE_CATEGORY_FACETABLE == 'true'):
logger.info("Make _catetory facetable")
make_category_facetable(indexId=INDEX_ID)
# Start crawler, and set status in DynamoDB table
logger.info("** Start crawler **")
kendra_sync_job_id = start_kendra_sync_job(dsId=DS_ID, indexId=INDEX_ID)
if (kendra_sync_job_id == None):
logger.info("Previous sync job still running. Exiting")
return exit_status(event, context, cfnresponse.SUCCESS)
put_crawler_state(STACK_NAME,'RUNNING')
# process S3 media objects
s3files=[]
try:
logger.info("** List and process S3 media objects **")
[s3mediaobjects, s3metadataobjects, s3transcribeoptsobjects] = list_s3_objects(MEDIA_BUCKET, MEDIA_FOLDER_PREFIX, METADATA_FOLDER_PREFIX, TRANSCRIBEOPTS_FOLDER_PREFIX)
for s3url in s3mediaobjects.keys():
process_s3_media_object(STACK_NAME, MEDIA_BUCKET, s3url, s3mediaobjects.get(s3url), s3metadataobjects.get(s3url), s3transcribeoptsobjects.get(s3url), kendra_sync_job_id, TRANSCRIBE_ROLE)
s3files.append(s3url)
# detect and delete indexed docs where files that are no longer in the source bucket location
# reasons: file deleted, or indexer config updated to crawl a new location
logger.info("** Process deletions **")
process_deletions(DS_ID, INDEX_ID, kendra_sync_job_id=kendra_sync_job_id, s3files=s3files)
except Exception as e:
logger.error("Exception: " + str(e))
put_crawler_state(STACK_NAME, 'STOPPED')
stop_kendra_sync_job_when_all_done(dsId=DS_ID, indexId=INDEX_ID)
return exit_status(event, context, cfnresponse.FAILED)
# Stop crawler
logger.info("** Stop crawler **")
put_crawler_state(STACK_NAME, 'STOPPED')
# Stop media sync job if no new transcription jobs were started
stop_kendra_sync_job_when_all_done(dsId=DS_ID, indexId=INDEX_ID)
# All done
return exit_status(event, context, cfnresponse.SUCCESS)