in textract-pipeline/lambda/asyncprocessor/lambda_function.py [0:0]
def startJob(bucketName, objectName, documentId, snsTopic, snsRole, detectForms, detectTables):
print("Starting job with documentId: {}, bucketName: {}, objectName: {}".format(documentId, bucketName, objectName))
response = None
client = AwsHelper().getClient('textract')
if(not detectForms and not detectTables):
response = client.start_document_text_detection(
ClientRequestToken = documentId,
DocumentLocation={
'S3Object': {
'Bucket': bucketName,
'Name': objectName
}
},
NotificationChannel= {
"RoleArn": snsRole,
"SNSTopicArn": snsTopic
},
JobTag = documentId)
else:
features = []
if(detectTables):
features.append("TABLES")
if(detectForms):
features.append("FORMS")
response = client.start_document_analysis(
ClientRequestToken = documentId,
DocumentLocation={
'S3Object': {
'Bucket': bucketName,
'Name': objectName
}
},
FeatureTypes=features,
NotificationChannel= {
"RoleArn": snsRole,
"SNSTopicArn": snsTopic
},
JobTag = documentId)
return response["JobId"]