def gold_aspect_spans_to_aspects_list()

in src/setfit/span/modeling.py [0:0]


    def gold_aspect_spans_to_aspects_list(self, inputs: Dataset) -> List[List[slice]]:
        # First group inputs by text
        grouped_data = defaultdict(list)
        for sample in inputs:
            text = sample.pop("text")
            grouped_data[text].append(sample)

        # Get the spaCy docs
        docs, _ = self.aspect_extractor(grouped_data.keys())

        # Get the aspect spans for each doc by matching gold spans to the spaCy tokens
        aspects_list = []
        index = -1
        skipped_indices = []
        for doc, samples in zip(docs, grouped_data.values()):
            aspects_list.append([])
            for sample in samples:
                index += 1
                match_objects = re.finditer(re.escape(sample["span"]), doc.text)
                for i, match in enumerate(match_objects):
                    if i == sample["ordinal"]:
                        char_idx_start = match.start()
                        char_idx_end = match.end()
                        span = doc.char_span(char_idx_start, char_idx_end)
                        if span is None:
                            logger.warning(
                                f"Aspect term {sample['span']!r} with ordinal {sample['ordinal']}, isn't a token in {doc.text!r} according to spaCy. "
                                "Skipping this sample."
                            )
                            skipped_indices.append(index)
                            continue
                        aspects_list[-1].append(slice(span.start, span.end))
        return docs, aspects_list, skipped_indices