in comprehend_groundtruth_integration/src/comprehend_customer_scripts/GroundTruth/DocumentClassifier/groundtruth_to_comprehend_clr_format_converter.py [0:0]
def _check_document_size(self, source, index, limits):
document_size_mb = len(source.encode('utf-8')) / BYTES_TO_MIB
if document_size_mb > limits['MAX_DOCUMENT_SIZE_MB']:
raise Exception(DOCUMENT_TOO_BIG.substitute(
size=limits['MAX_DOCUMENT_SIZE_MB'],
line=index,
file=self.groundtruth_manifest_file_name,
))