in src/createDocumentClassifier.py [0:0]
def lambda_handler(event, context):
DSTTRAINFILE='comprehend-train.csv'
s3_train_data = 's3://{}/{}/{}'.format(os.environ['classifierBucket'], os.environ['classifierBucketPrefix'], DSTTRAINFILE)
s3_output_job = 's3://{}/{}/{}'.format(os.environ['classifierBucket'], os.environ['classifierBucket'], 'output/train_job')
print('training data location: ',s3_train_data, "output location:", s3_output_job)
uid = str(uuid.uuid4())
comprehend = boto3.client('comprehend')
training_job = comprehend.create_document_classifier(
DocumentClassifierName='aim317-custom-classifier-' + uid,
DataAccessRoleArn=os.environ['ComprehendARN'],
InputDataConfig={
'S3Uri': s3_train_data
},
OutputDataConfig={
'S3Uri': s3_output_job
},
LanguageCode='en',
VersionName='v001'
)
return {
'DocumentClassifierArn': training_job['DocumentClassifierArn']
}