in workflow3_local/local_endpointbuilder.py [0:0]
def get_endpoint_arn(csv_file_name, bucket_name, role_arn, model_name):
comprehend = boto3.client("comprehend")
region = boto3.session.Session().region_name
account_number = role_arn.lstrip("arn:aws:iam::").split(":")[0]
try:
# create model with the CSV training dataset's S3 URI and the ARN of the IAM role
create_response = comprehend.create_document_classifier(
InputDataConfig={
"S3Uri": f"s3://{bucket_name}/{csv_file_name}"
},
DataAccessRoleArn=role_arn,
DocumentClassifierName=model_name,
LanguageCode="en"
)
model_arn = create_response['DocumentClassifierArn']
print("Created Comprehend model")
except botocore.exceptions.ClientError as error:
# if model already exists
if error.response["Error"]["Code"] == "ResourceInUseException":
model_arn = f"arn:aws:comprehend:{region}:{account_number}:document-classifier/{model_name}"
print("Model has already been created")
else:
raise error
describe_response = comprehend.describe_document_classifier(
DocumentClassifierArn=model_arn)
status = describe_response['DocumentClassifierProperties']['Status']
print("Model training...")
while status != "TRAINED":
if status == "IN_ERROR":
message = describe_response["DocumentClassifierProperties"]["Message"]
raise ValueError(f"The classifier is in error:", message)
# update the model's status every 5 minutes if it has not finished training
sleep(300)
describe_response = comprehend.describe_document_classifier(
DocumentClassifierArn=model_arn)
status = describe_response["DocumentClassifierProperties"]["Status"]
print("Model trained. Creating endpoint")
try:
endpoint_response = comprehend.create_endpoint(
EndpointName=model_name,
ModelArn=model_arn,
DesiredInferenceUnits=10,
)
endpoint_arn = endpoint_response["EndpointArn"]
describe_response = comprehend.describe_endpoint(
EndpointArn=endpoint_arn
)
status = describe_response["EndpointProperties"]["Status"]
while status == "CREATING":
if status == "IN_ERROR":
message = describe_response["EndpointProperties"]["Message"]
raise ValueError(f"The endpoint is in error:", message)
# update the endpoint's status every 3 minutes if it has not been created
sleep(180)
describe_response = comprehend.describe_endpoint(
EndpointArn=endpoint_arn
)
status = describe_response["EndpointProperties"]["Status"]
except botocore.exceptions.ClientError as error:
# if model already exists
if error.response["Error"]["Code"] == "ResourceInUseException":
endpoint_arn = f"arn:aws:comprehend:{region}:{account_number}:document-classifier-endpoint/{model_name}"
print("Endpoint has already been created")
else:
raise error
# the model's endpoint ARN is returned as a string
return endpoint_arn