def lambda_handler()

in aws-kendra-transcribe-media-search/lambda/indexer/crawler.py [0:0]


def lambda_handler(event, context):
    logger.info("Received event: %s" % json.dumps(event))
    
    # Handle Delete event from Cloudformation custom resource
    # In all other cases start crawler
    if (('RequestType' in event) and (event['RequestType'] == 'Delete')):
        logger.info("Cfn Delete event - no action - return Success")
        return exit_status(event, context, cfnresponse.SUCCESS)
    
    # exit if crawler is already running
    crawler_state = get_crawler_state(STACK_NAME)
    if (crawler_state):
        logger.info(f"crawler sync state: {crawler_state}")
        if (crawler_state == "RUNNING"):
            logger.info("Previous crawler invocation is running. Exiting")
            return exit_status(event, context, cfnresponse.SUCCESS)
            
    #Make _category facetable if needed
    if (MAKE_CATEGORY_FACETABLE == 'true'):
        logger.info("Make _catetory facetable")
        make_category_facetable(indexId=INDEX_ID)
    # Start crawler, and set status in DynamoDB table
    logger.info("** Start crawler **")
    kendra_sync_job_id = start_kendra_sync_job(dsId=DS_ID, indexId=INDEX_ID)
    if (kendra_sync_job_id == None):
        logger.info("Previous sync job still running. Exiting")
        return exit_status(event, context, cfnresponse.SUCCESS)
    put_crawler_state(STACK_NAME,'RUNNING')  
        
    # process S3 media objects
    s3files=[]
    try:
        logger.info("** List and process S3 media objects **")
        [s3mediaobjects, s3metadataobjects, s3transcribeoptsobjects] = list_s3_objects(MEDIA_BUCKET, MEDIA_FOLDER_PREFIX, METADATA_FOLDER_PREFIX, TRANSCRIBEOPTS_FOLDER_PREFIX)
        for s3url in s3mediaobjects.keys():
            process_s3_media_object(STACK_NAME, MEDIA_BUCKET, s3url, s3mediaobjects.get(s3url), s3metadataobjects.get(s3url), s3transcribeoptsobjects.get(s3url), kendra_sync_job_id, TRANSCRIBE_ROLE)
            s3files.append(s3url)
        # detect and delete indexed docs where files that are no longer in the source bucket location
        # reasons: file deleted, or indexer config updated to crawl a new location
        logger.info("** Process deletions **")
        process_deletions(DS_ID, INDEX_ID, kendra_sync_job_id=kendra_sync_job_id, s3files=s3files)
    except Exception as e:
        logger.error("Exception: " + str(e))
        put_crawler_state(STACK_NAME, 'STOPPED')            
        stop_kendra_sync_job_when_all_done(dsId=DS_ID, indexId=INDEX_ID)
        return exit_status(event, context, cfnresponse.FAILED)

    # Stop crawler
    logger.info("** Stop crawler **")
    put_crawler_state(STACK_NAME, 'STOPPED')
    
    # Stop media sync job if no new transcription jobs were started
    stop_kendra_sync_job_when_all_done(dsId=DS_ID, indexId=INDEX_ID)
    
    # All done
    return exit_status(event, context, cfnresponse.SUCCESS)