in src/tdp.py [0:0]
def __init__(self, bucketName, documentPath, awsRegion, detectText, detectForms, detectTables):
ip = Input()
if(bucketName):
ip.bucketName = bucketName
if(documentPath):
ip.documentPath = documentPath
if(awsRegion):
ip.awsRegion = awsRegion
if(detectText):
ip.detectText = detectText
if(detectForms):
ip.detectForms = detectForms
if(detectTables):
ip.detectTables = detectTables
if(not ip.bucketName and not ip.documentPath):
raise Exception("Document is required.")
if(ip.bucketName):
ip.isLocalDocument = False
else:
ip.isLocalDocument = True
ext = FileHelper.getFileExtenstion(ip.documentPath).lower()
if(ext == "pdf"):
ip.documentType = "PDF"
elif(ext == "jpg" or ext == "jpeg" or ext == "png"):
ip.documentType = "IMAGE"
else:
raise Exception("Document should be jpg/jpeg, png or pdf.")
if(ip.documentType == "PDF" and ip.isLocalDocument):
raise Exception("PDF must be in S3 bucket.")
if(ip.detectText == False and ip.detectForms == False and ip.detectTables == False):
raise Exception("Select at least one option to extract text, form or table")
self.inputParameters = ip