in src/textractor.py [0:0]
def validateInput(self, args):
event = self.getInputParameters(args)
ips = {}
if(not 'documents' in event):
raise Exception("Document or path to a folder or S3 bucket containing documents is required.")
inputDocument = event['documents']
idl = inputDocument.lower()
bucketName = None
documents = []
awsRegion = 'us-east-1'
if(idl.startswith("s3://")):
o = urlparse(inputDocument)
bucketName = o.netloc
path = o.path[1:]
ar = S3Helper.getS3BucketRegion(bucketName)
if(ar):
awsRegion = ar
if(idl.endswith("/")):
allowedFileTypes = ["jpg", "jpeg", "png", "pdf"]
documents = S3Helper.getFileNames(awsRegion, bucketName, path, 1, allowedFileTypes)
else:
documents.append(path)
else:
if(idl.endswith("/")):
allowedFileTypes = ["jpg", "jpeg", "png"]
documents = FileHelper.getFileNames(inputDocument, allowedFileTypes)
else:
documents.append(inputDocument)
if('region' in event):
awsRegion = event['region']
ips["bucketName"] = bucketName
ips["documents"] = documents
ips["awsRegion"] = awsRegion
ips["text"] = ('text' in event)
ips["forms"] = ('forms' in event)
ips["tables"] = ('tables' in event)
ips["insights"] = ('insights' in event)
ips["medical-insights"] = ('medical-insights' in event)
if("translate" in event):
ips["translate"] = event["translate"]
else:
ips["translate"] = ""
return ips