in comprehend_groundtruth_integration/src/comprehend_customer_scripts/GroundTruth/DocumentClassifier/groundtruth_to_comprehend_clr_format_converter.py [0:0]
def convert_to_multiclass_dataset(self, index, jsonLine):
jsonLine_object = self._parse_manifest_input(index, jsonLine)
if jsonLine_object is not None:
if SOURCE not in jsonLine_object.keys():
raise Exception(CANNOT_PARSE_AUGMENTED_MANIFEST.substitute(line=index,
file_name=self.groundtruth_manifest_file_name))
source = jsonLine_object[SOURCE]
self._check_document_size(source, index, limits=default_limits)
self.labeling_job_name = self.get_labeling_job_name(index, jsonLine_object)
if CLASS_NAME not in jsonLine_object[self.labeling_job_name].keys():
raise Exception(CANNOT_PARSE_AUGMENTED_MANIFEST.substitute(line=index,
file_name=self.groundtruth_manifest_file_name))
class_name = jsonLine_object[self.labeling_job_name][CLASS_NAME]
if not class_name:
raise Exception(EMPTY_LABEL_UNSUPPORTED.substitute(filename=self.groundtruth_manifest_file_name))
self._check_label_size(class_name, index, limits=default_limits)
return class_name, source