def convert_to_multilabel_dataset()

in comprehend_groundtruth_integration/src/comprehend_customer_scripts/GroundTruth/DocumentClassifier/groundtruth_to_comprehend_clr_format_converter.py [0:0]


    def convert_to_multilabel_dataset(self, index, jsonLine, label_delimiter):
        self.label_delimiter = label_delimiter

        jsonLine_object = self._parse_manifest_input(index, jsonLine)
        if jsonLine_object is not None:
            if SOURCE not in jsonLine_object.keys():
                raise Exception(CANNOT_PARSE_AUGMENTED_MANIFEST.substitute(line=index,
                                                                           file_name=self.groundtruth_manifest_file_name))
            source = jsonLine_object[SOURCE]
            self._check_document_size(source, index, limits=default_limits)

            self.labeling_job_name = self.get_labeling_job_name(index, jsonLine_object)

            if CLASS_MAP not in jsonLine_object[self.labeling_job_name].keys():
                raise Exception(CANNOT_PARSE_AUGMENTED_MANIFEST.substitute(line=index,
                                                                           file_name=self.groundtruth_manifest_file_name))
            class_map = jsonLine_object[self.labeling_job_name][CLASS_MAP]

            # Raise CustomerError when no label found for the document
            if len(class_map) == 0:
                raise Exception(EMPTY_LABEL_UNSUPPORTED.substitute(filename=self.groundtruth_manifest_file_name))

            # Raise CustomerError if label size is more than 5000 characters
            for label in class_map.values():
                self._check_label_size(label, index, limits=default_limits)

            labels = self._get_labels(class_map)

            # Raise Customer error when empty label found in the list of labels
            label_list = labels.split(self.label_delimiter)
            for label in label_list:
                if len(label) == 0:
                    raise Exception(EMPTY_LABEL_FOUND.substitute(line=index,
                                                                 file=self.groundtruth_manifest_file_name))

        return labels, source