def prepare_labels()

in data_measurements/labels/labels.py [0:0]
19 lines of code
5 McCabe index (conditional complexity)

    def prepare_labels(self, label_field, label_names=[]):
        """ Uses the evaluate library to return the label distribution. """
        logs.info("Inside main label calculation function.")
        logs.debug("Looking for label field called '%s'" % label_field)
        # The input Dataset object
        # When the label field is not found, an error will be thrown.
        if label_field in self.dset.features:
            label_list = self.dset[label_field]
        else:
            logs.warning("No label column found -- nothing to do. Returning.")
            logs.debug(self.dset.features)
            return {}
        # Get the evaluate library's measurement for label distro.
        label_distribution = evaluate.load(EVAL_LABEL_MEASURE)
        # Measure the label distro.
        label_measurement = label_distribution.compute(data=label_list)
        # TODO: Incorporate this summation into what the evaluate library returns.
        label_sum_dict = Counter(label_list)
        label_sums = [label_sum_dict[key] for key in sorted(label_sum_dict)]
        label_measurement["sums"] = label_sums
        if not label_names:
            # Have to extract the label names from the Dataset object when the
            # actual dataset columns are just ints representing the label names.
            label_names = extract_label_names(label_field, self.ds_name,
                                              self.config_name)
        label_results = make_label_results_dict(label_measurement, label_names)
        return label_results