in code/document_classifier/document_classifier.py [0:0]
def processRequest(newImage):
documentMetadata = newImage.get("documentMetadata")
documentId = newImage.get("documentId")
bucketName = newImage.get("bucketName")
objectName = newImage.get("documentName")
if documentMetadata and documentId and bucketName and objectName:
print("Valid document item to classify!")
else:
raise ValueError("Invalid document item! Please check the incoming dynamoDB record stream")
print("DocumentId: {}, BucketName: {}, ObjectName: {}".format(documentId, bucketName, objectName))
### This is logic to determine whether or not the document should be sent to NLP processing pipeline
### Could be anything; we just determined this could be easy to implement based on document metadata
print(documentMetadata)
if documentMetadata['class'] in documentTypes['NLP_VALID']:
return startNLPProcessing(bucketName, objectName, documentId)
else:
return {
'statusCode': 200,
'message': "Document {} not eligible for NLP Processing".format(documentId)
}