in packages/ekyc-api/src/ekyc-api/Controllers/TrainingController.cs [145:311]
public async Task<string> StartLabellingJob(string JobId)
{
if (string.IsNullOrEmpty(JobId))
throw new HttpStatusException(HttpStatusCode.InternalServerError, "Job ID must be provided.");
var config = new DynamoDBOperationConfig
{
OverrideTableName = Globals.TrainingTableName
};
var job = await _dbContext.LoadAsync<TrainingJob>(JobId, config);
if (job == null)
throw new HttpStatusException(HttpStatusCode.InternalServerError, "Job not found.");
var strLocalManifestPath = "/tmp/" + JobId + ".json";
var strBucketName = Globals.TrainingBucket;
// List all the files in the S3 path
ListObjectsV2Request request = new ListObjectsV2Request
{
BucketName = strBucketName,
MaxKeys = 10,
Prefix = $"images/{JobId}/"
};
using (var fs = System.IO.File.Open(strLocalManifestPath, FileMode.Create))
{
using (var sw = new StreamWriter(fs))
{
ListObjectsV2Response response;
do
{
response = await _amazonS3.ListObjectsV2Async(request);
// Process the response.
foreach (S3Object entry in response.S3Objects)
{
Console.WriteLine("key = {0} size = {1}",
entry.Key, entry.Size);
string fullS3Path =
$"s3://{strBucketName}/{entry.Key}";
sw.WriteLine(@"{""source-ref"":""" + fullS3Path + @"""}");
}
Console.WriteLine("Next Continuation Token: {0}", response.NextContinuationToken);
request.ContinuationToken = response.NextContinuationToken;
} while (response.IsTruncated);
}
}
// Upload the manifest to S3
var strManifestKey = "manifests/" + JobId + ".json";
await _amazonS3.PutObjectAsync(new Amazon.S3.Model.PutObjectRequest
{ BucketName = strBucketName, FilePath = strLocalManifestPath, Key = strManifestKey });
// Next, create the label category JSON
var documentTypeNames = Enum.GetNames(typeof(DocumentTypes));
var strLocalLabelCatPath = "/tmp/" + JobId + "-cat.json";
using (var fs = System.IO.File.OpenWrite(strLocalLabelCatPath))
{
using (var sw = new StreamWriter(fs))
{
sw.Write(@" {
""document-version"": ""2018-11-28"",
""labels"": [
");
bool isFirst = true;
foreach (var strDocumentType in documentTypeNames)
{
if (isFirst)
isFirst = false;
else
sw.WriteLine(",");
sw.Write(@" {
""label"": """ + strDocumentType + @"""
}");
}
sw.WriteLine("]}");
}
}
var strCategoryFileKey = "category/" + JobId + ".json";
// Upload the category JSON
await _amazonS3.PutObjectAsync(new Amazon.S3.Model.PutObjectRequest
{ BucketName = strBucketName, FilePath = strLocalLabelCatPath, Key = strCategoryFileKey });
// Create the SageMaker Ground Truth labelling job
//public const string UiTemplateS3Uri = "label-instructions.html";
Amazon.SageMaker.Model.CreateLabelingJobResponse labellingResponse = null;
try
{
labellingResponse = await _sageMaker.CreateLabelingJobAsync(
new Amazon.SageMaker.Model.CreateLabelingJobRequest
{
HumanTaskConfig = new Amazon.SageMaker.Model.HumanTaskConfig
{
AnnotationConsolidationConfig = new Amazon.SageMaker.Model.AnnotationConsolidationConfig
{
AnnotationConsolidationLambdaArn =
"arn:aws:lambda:ap-southeast-1:377565633583:function:ACS-BoundingBox"
},
NumberOfHumanWorkersPerDataObject = 1,
PreHumanTaskLambdaArn =
"arn:aws:lambda:ap-southeast-1:377565633583:function:PRE-BoundingBox",
TaskDescription = $"Labelling for Document Bounding Boxes job Id {JobId}",
TaskTimeLimitInSeconds = 28800,
TaskTitle = $"Labelling for Document Bounding Boxes job Id {JobId}",
UiConfig = new Amazon.SageMaker.Model.UiConfig
{
UiTemplateS3Uri = Globals.GroundTruthUiTemplateS3Uri
},
WorkteamArn = Globals.GroundTruthWorkteamArn
},
InputConfig = new Amazon.SageMaker.Model.LabelingJobInputConfig
{
DataSource = new Amazon.SageMaker.Model.LabelingJobDataSource
{
S3DataSource = new Amazon.SageMaker.Model.LabelingJobS3DataSource
{ ManifestS3Uri = $"s3://{strBucketName}/{strManifestKey}" }
}
},
LabelAttributeName = "doctype",
LabelCategoryConfigS3Uri = $"s3://{strBucketName}/{strCategoryFileKey}",
LabelingJobName = JobId,
OutputConfig = new Amazon.SageMaker.Model.LabelingJobOutputConfig
{
S3OutputPath = $"s3://{strBucketName}/output"
},
RoleArn = Globals.GroundTruthRoleArn
});
var arn = labellingResponse.LabelingJobArn;
job.LabellingJobArn = arn;
await _dbContext.SaveAsync(job, config);
return arn;
}
catch (Exception ex)
{
_logger.Log(LogLevel.Error,
$"An error occurred trying to create the SageMaker labelling job: {ex.Message}");
throw;
}
}
}
}