def handler()

in comprehend.py [0:0]


def handler(event, context):
    print("Received event: " + json.dumps(event, indent=2))
    
    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = unquote_plus(event['Records'][0]['s3']['object']['key'])
    print("key is"+key)
    print("bucket is"+bucket)
    textvalues=[]
    textvalues_entity={}
    text=""
    try:
        s3.Bucket(bucket).download_file(Key=key,Filename='/tmp/{}')
        print("Object downloaded")
        pdfFileObj = open('/tmp/{}', 'rb')
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj) 
        num_pages = pdfReader.numPages
        print("number of pages")
        print(num_pages)
        count = 0
        extracted_pdftext = ""
        searchable_text=[]
  
    #The while loop will read each page
        while count < num_pages:
            pageObj = pdfReader.getPage(count)
            count +=1
            print(count)
            print("-------------iteration starts---------")
            extracted_pdftext = pageObj.extractText()
            if(sys.getsizeof(extracted_pdftext)> 5000):
                text = extracted_pdftext[:5000]
                text.strip('\t\n\r')
            else:
                text=extracted_pdftext.strip('\t\n\r')
            searchable_text.append(text)
            # Extracting Key Phrases
            print(text)
            sentiment_response = comprehend.detect_key_phrases(Text=text, LanguageCode='en')
            KeyPhraseList=sentiment_response.get("KeyPhrases")
            accuracy=90.0
            for s in KeyPhraseList:
                score=float(s.get("Score"))*100
                if(score >= accuracy):
                    textvalues.append(s.get("Text").strip('\t\n\r'))
                    
            detect_entity= comprehend.detect_entities(Text=text, LanguageCode='en')
             #print(detect_entity)
            EntityList=detect_entity.get("Entities")
            #print(EntityList)
            for s in EntityList:
                score=float(s.get("Score"))*100
                if(score >= accuracy):
                    textvalues_entity.update([(s.get("Type").strip('\t\n\r'),s.get("Text").strip('\t\n\r'))])
            
        pdfFileObj.close() 
        #https://s3.console.aws.amazon.com/s3/object/%3Cbucket%3E/%3Ckey%3E?region=us-east-1
        s3url= 'https://s3.console.aws.amazon.com/s3/object/'+bucket+'/'+key+'?region=us-east-1'
        searchdata={'s3link':s3url,'KeyPhrases':textvalues,'Entity':textvalues_entity,'text':searchable_text}
        print(searchdata)
        print("connecting to ES")
        es=connectES()
        #es.index(index="resume-search", doc_type="_doc", body=searchdata)
        es.index(index="resume", doc_type="_doc", body=searchdata)
        print("data uploaded to Elasticsearch")
        return 'keyphrases Successfully Uploaded'
    except Exception as e:
        print(e)
        print('Error: ')
        raise e