in main/src/training-build/training-build.py [0:0]
def handler(event, context):
trainId = event['trainId']
trainInfo = trainConfigurationClient.getTraining(trainId)
embeddingName = trainInfo['embeddingName']
embeddingInfo = trainConfigurationClient.getEmbeddingInfo(embeddingName)
filterDict={}
if ('filterBucket' in trainInfo) and ('filterKey' in trainInfo):
if (len(trainInfo['filterBucket'])>0) and (len(trainInfo['filterKey'])>0):
filterBucket = trainInfo['filterBucket']
filterKey = trainInfo['filterKey']
fileObject = s3c.get_object(Bucket=filterBucket, Key=filterKey)
text = fileObject['Body'].read().decode('utf-8')
lines = text.splitlines()
for line in lines:
filterDict[line]=True
filterCount = len(filterDict)
print("Filter contains {} entries".format(filterCount))
plateList = imageClient.listCompatiblePlates(embeddingInfo['inputWidth'], embeddingInfo['inputHeight'], embeddingInfo['inputDepth'], embeddingInfo['inputChannels'])
filterCount=0
unlabeledCount=0
labelCount=0
trainPrefixList=[]
for i, pi in enumerate(plateList):
plateId = pi['plateId']
print("Processing plate {} {} of {}".format(plateId, i, len(plateList)))
imageList = imageClient.getImagesByPlateId(plateId)
imageListCount=len(imageList)
print("Image list has {} entries".format(imageListCount))
for imageItem in imageList:
image = imageItem['Item']
imageId = image['imageId']
if imageId in filterDict:
filterCount+=1
else:
if ('trainCategory' in image) and ('trainLabel' in image):
if (image['trainCategory']=='moa') and (len(image['trainLabel'])>0):
prefixKey = bp.getTrainPrefixKey(embeddingName, plateId, imageId)
trainPrefixList.append(prefixKey)
labelCount+=1
else:
unlabeledCount+=1
else:
unlabeledCount+=1
print("Train prefix list has {} entries".format(len(trainPrefixList)))
print("labelCount={} filterCount={} unlabeldCount={}".format(labelCount, filterCount, unlabeledCount))
trainPrefixArtifactPath = bp.getTrainImageListArtifactPath(trainId)
trainPrefixStringList = "\n".join(trainPrefixList) + "\n"
trainPrefixStringListBytes = bytes(trainPrefixStringList, encoding='utf-8')
s3c.put_object(Body=trainPrefixStringListBytes, Bucket=BUCKET, Key=trainPrefixArtifactPath)
akey = "s3key#" + trainPrefixArtifactPath
artifact = {
"contextId": trainId,
"trainId": trainId,
"artifact": akey
}
artifactClient.createArtifact(artifact)
response = {
"statusCode": 200,
"body": "success"
}
return response