in src/handler.py [0:0]
def pii_access_control_handler(event, context):
"""Detect Lambda function handler."""
LOG.info(f'Received event with requestId: {event[REQUEST_ID]}')
LOG.debug(f'Raw event {event}')
InputEventValidator.validate(event)
invoke_args = json.loads(event[S3OL_CONFIGURATION][PAYLOAD]) if event[S3OL_CONFIGURATION][PAYLOAD] else {}
language_code = invoke_args.get(LANGUAGE_CODE, DEFAULT_LANGUAGE_CODE)
detection_config = ClassificationConfig(**invoke_args)
object_get_context = event[GET_OBJECT_CONTEXT]
s3ol_access_point = event[S3OL_CONFIGURATION][S3OL_ACCESS_POINT_ARN]
s3 = S3Client(s3ol_access_point)
cloud_watch = CloudWatchClient()
comprehend = ComprehendClient(session_id=event[REQUEST_ID], user_agent=DEFAULT_USER_AGENT, endpoint_url=COMPREHEND_ENDPOINT_URL,
s3ol_access_point=s3ol_access_point)
exception_handler = ExceptionHandler(s3)
LOG.debug("Pii Entity Types to be detected:" + str(detection_config.pii_entity_types))
pii_classification_segmenter = Segmenter(DOCUMENT_MAX_SIZE_CONTAINS_PII_ENTITIES)
processed_document = False
processed_pii_document = False
pii_entities = []
try:
def time_bound_task():
nonlocal processed_document
nonlocal processed_pii_document
nonlocal pii_entities
PartialObjectRequestValidator.validate(event)
time1 = time.time()
text, http_headers, status_code = s3.download_file_from_presigned_url(object_get_context[INPUT_S3_URL],
event[USER_REQUEST][HEADERS])
time2 = time.time()
LOG.info(f"Downloaded the file in : {(time2 - time1)} seconds")
pii_entities = classify(text, pii_classification_segmenter, comprehend, detection_config, language_code)
time1 = time.time()
processed_document = True
LOG.info(f"Pii detection completed within {(time1 - time2)} seconds. Returning back the response to S3")
if len(pii_entities) > 0:
processed_pii_document = True
raise RestrictedDocumentException()
else:
text_bytes = text.encode('utf-8')
http_headers[CONTENT_LENGTH] = len(text_bytes)
s3.respond_back_with_data(text_bytes, http_headers, object_get_context[REQUEST_ROUTE],
object_get_context[REQUEST_TOKEN],
status_code)
execute_task_with_timeout(context.get_remaining_time_in_millis() - RESERVED_TIME_FOR_CLEANUP, time_bound_task)
except Exception as generated_exception:
exception_handler.handle_exception(generated_exception, object_get_context[REQUEST_ROUTE], object_get_context[REQUEST_TOKEN])
finally:
if PUBLISH_CLOUD_WATCH_METRICS:
publish_metrics(cloud_watch, s3, comprehend, processed_document, processed_pii_document, language_code,
s3ol_access_point, pii_entities)
LOG.info("Responded back to s3 successfully")