public async Task StartLabellingJob()

in packages/ekyc-api/src/ekyc-api/Controllers/TrainingController.cs [145:311]


        public async Task<string> StartLabellingJob(string JobId)
        {
            if (string.IsNullOrEmpty(JobId))
                throw new HttpStatusException(HttpStatusCode.InternalServerError, "Job ID must be provided.");

            var config = new DynamoDBOperationConfig
            {
                OverrideTableName = Globals.TrainingTableName
            };

            var job = await _dbContext.LoadAsync<TrainingJob>(JobId, config);

            if (job == null)
                throw new HttpStatusException(HttpStatusCode.InternalServerError, "Job not found.");

            var strLocalManifestPath = "/tmp/" + JobId + ".json";

            var strBucketName = Globals.TrainingBucket;

            // List all the files in the S3 path

            ListObjectsV2Request request = new ListObjectsV2Request
            {
                BucketName = strBucketName,
                MaxKeys = 10,
                Prefix = $"images/{JobId}/"
            };

            using (var fs = System.IO.File.Open(strLocalManifestPath, FileMode.Create))
            {
                using (var sw = new StreamWriter(fs))
                {
                    ListObjectsV2Response response;

                    do
                    {
                        response = await _amazonS3.ListObjectsV2Async(request);

                        // Process the response.
                        foreach (S3Object entry in response.S3Objects)
                        {
                            Console.WriteLine("key = {0} size = {1}",
                                entry.Key, entry.Size);

                            string fullS3Path =
                                $"s3://{strBucketName}/{entry.Key}";

                            sw.WriteLine(@"{""source-ref"":""" + fullS3Path + @"""}");
                        }

                        Console.WriteLine("Next Continuation Token: {0}", response.NextContinuationToken);
                        request.ContinuationToken = response.NextContinuationToken;
                    } while (response.IsTruncated);
                }
            }

            // Upload the manifest to S3

            var strManifestKey = "manifests/" + JobId + ".json";

            await _amazonS3.PutObjectAsync(new Amazon.S3.Model.PutObjectRequest
                { BucketName = strBucketName, FilePath = strLocalManifestPath, Key = strManifestKey });

            // Next, create the label category JSON

            var documentTypeNames = Enum.GetNames(typeof(DocumentTypes));

            var strLocalLabelCatPath = "/tmp/" + JobId + "-cat.json";

            using (var fs = System.IO.File.OpenWrite(strLocalLabelCatPath))
            {
                using (var sw = new StreamWriter(fs))
                {
                    sw.Write(@" {

""document-version"": ""2018-11-28"",
""labels"": [
");
                    bool isFirst = true;
                    foreach (var strDocumentType in documentTypeNames)
                    {
                        if (isFirst)
                            isFirst = false;
                        else
                            sw.WriteLine(",");
                        sw.Write(@" {

""label"": """ + strDocumentType + @"""

}");
                    }

                    sw.WriteLine("]}");
                }
            }

            var strCategoryFileKey = "category/" + JobId + ".json";

            // Upload the category JSON
            await _amazonS3.PutObjectAsync(new Amazon.S3.Model.PutObjectRequest
                { BucketName = strBucketName, FilePath = strLocalLabelCatPath, Key = strCategoryFileKey });


            // Create the SageMaker Ground Truth labelling job
            //public const string UiTemplateS3Uri = "label-instructions.html";

            Amazon.SageMaker.Model.CreateLabelingJobResponse labellingResponse = null;

            try
            {
                labellingResponse = await _sageMaker.CreateLabelingJobAsync(
                    new Amazon.SageMaker.Model.CreateLabelingJobRequest
                    {
                        HumanTaskConfig = new Amazon.SageMaker.Model.HumanTaskConfig
                        {
                            AnnotationConsolidationConfig = new Amazon.SageMaker.Model.AnnotationConsolidationConfig
                            {
                                AnnotationConsolidationLambdaArn =
                                    "arn:aws:lambda:ap-southeast-1:377565633583:function:ACS-BoundingBox"
                            },
                            NumberOfHumanWorkersPerDataObject = 1,
                            PreHumanTaskLambdaArn =
                                "arn:aws:lambda:ap-southeast-1:377565633583:function:PRE-BoundingBox",
                            TaskDescription = $"Labelling for Document Bounding Boxes job Id {JobId}",
                            TaskTimeLimitInSeconds = 28800,
                            TaskTitle = $"Labelling for Document Bounding Boxes job Id {JobId}",
                            UiConfig = new Amazon.SageMaker.Model.UiConfig
                            {
                                UiTemplateS3Uri = Globals.GroundTruthUiTemplateS3Uri
                            },
                            WorkteamArn = Globals.GroundTruthWorkteamArn
                        },
                        InputConfig = new Amazon.SageMaker.Model.LabelingJobInputConfig
                        {
                            DataSource = new Amazon.SageMaker.Model.LabelingJobDataSource
                            {
                                S3DataSource = new Amazon.SageMaker.Model.LabelingJobS3DataSource
                                    { ManifestS3Uri = $"s3://{strBucketName}/{strManifestKey}" }
                            }
                        },
                        LabelAttributeName = "doctype",
                        LabelCategoryConfigS3Uri = $"s3://{strBucketName}/{strCategoryFileKey}",
                        LabelingJobName = JobId,
                        OutputConfig = new Amazon.SageMaker.Model.LabelingJobOutputConfig
                        {
                            S3OutputPath = $"s3://{strBucketName}/output"
                        },
                        RoleArn = Globals.GroundTruthRoleArn
                    });

                var arn = labellingResponse.LabelingJobArn;

                job.LabellingJobArn = arn;

                await _dbContext.SaveAsync(job, config);

                return arn;
            }
            catch (Exception ex)
            {
                _logger.Log(LogLevel.Error,
                    $"An error occurred trying to create the SageMaker labelling job: {ex.Message}");
                throw;
            }
        }
    }
}