def process_chunk()

in kilt/datasets/fact_verification.py [0:0]


    def process_chunk(self, chunk, ks, chunk_id=-1):
        missing_pages = 0.0
        exact_match = 0.0
        fuzzy_match = 0.0
        n = len(chunk)
        kilt_data = []
        metadata = []

        for idx, datapoint in enumerate(chunk):
            print(
                "t: {}, p: {:.2f} %, mp: {:.1f}, exact: {:.1f}, fuzzy: {:.1f}".format(
                    chunk_id,
                    round(idx * 100 / n, 2),
                    missing_pages,
                    exact_match,
                    fuzzy_match,
                ),
                end="\r",
            )
            sys.stdout.flush()

            page_id = datapoint["page_id"]
            sent_id = datapoint["sent_id"]
            text = datapoint["text"]

            if not text or text == None or len(text) == 0:
                continue

            url = "https://en.wikipedia.org/wiki/" + self._normalize(
                datapoint["page_id"]
            )
            page = ks.get_page_from_url(url)
            if not page:
                missing_pages += 1
            else:
                # get and validate evidence sentence

                local_sem = 0.0
                local_sfm = 0.0

                kilt_record = {
                    # original data point id if available otherwise unique id
                    "page_id": page_id,
                    "sentence_id": sent_id,
                    "evidence_text": text,
                }

                kilt_record_output = []

                paragraph_id, start_character, end_character, bleu = utils.match_answer(
                    text, page, nlp=self.nlp, debug=False
                )

                kilt_record_output.append(
                    {
                        # answer in textual form
                        "answer": text,
                        "provenance": [
                            # list of relevant WikipediaPages / Spans as provenance for the answer from the ks
                            {
                                "wikipedia_id": page[
                                    "wikipedia_id"
                                ],  # *mandatory* - ID Wikipedia Page
                                "title": page[
                                    "wikipedia_title"
                                ],  # *mandatory* - Title Wikipedia Page
                                "start_paragraph_id": paragraph_id,  # start paragraph id with relevant info
                                "start_character": start_character,
                                "end_paragraph_id": paragraph_id,  # end paragraph id
                                "end_character": end_character,
                                "bleu_score": bleu,  # 1.0 when gold data is exactly matched, lower for fuzzy matches
                                "meta": {  # dataset/task specific
                                    "fever_page_id": page_id,
                                    "fever_sentence_id": sent_id,
                                },
                            }
                        ],
                    }
                )

                if bleu == 1:
                    local_sem += 1
                elif bleu < 1 and bleu >= 0:
                    local_sfm += 1
                else:
                    print("ERROR: invalid bleu: {}".format(bleu))
                    sys.exit(-1)

                # update kilt data
                kilt_record["output"] = kilt_record_output
                kilt_data.append(kilt_record)

                exact_match += local_sem  # / len(short_answers)
                fuzzy_match += local_sfm  # / len(short_answers)

                metadata = [missing_pages, exact_match, fuzzy_match]

        return kilt_data, metadata