def process_chunk()

in kilt/datasets/triviaqa.py [0:0]


    def process_chunk(self, chunk, ks, chunk_id=-1):
        missing_pages = 0.0
        short_exact_match = 0.0
        short_fuzzy_match = 0.0
        n = len(chunk)
        kilt_data = []

        for idx, datapoint in enumerate(chunk):

            print(
                "t: {}, p: {:.2f} %, mp: {:.1f}, exact: {:.1f}, fuzzy: {:.1f}".format(
                    chunk_id,
                    round(idx * 100 / n, 2),
                    missing_pages,
                    short_exact_match,
                    short_fuzzy_match,
                ),
                end="\r",
            )
            sys.stdout.flush()

            # answer
            answers = datapoint["Answer"]["Aliases"]
            normalized_answers = datapoint["Answer"]["NormalizedAliases"]
            question = datapoint["Question"]
            wikipedia_pages = datapoint["EntityPages"]
            wiki_titles = [i["Title"] for i in wikipedia_pages]
            dataset_id = datapoint["QuestionId"]

            # group by question,
            for answer_index, answer in enumerate(answers):
                for title in wiki_titles:
                    page = ks.get_pages_by_title(title)
                    if not page:
                        missing_pages += 1 # metric will be inflated since its on each unfetchable page
                    else:
                        page = page[0]
                        kilt_record = {
                            # original data point id if available otherwise unique id
                            "id": dataset_id,
                            # question / claim / sentence
                            # dialogue history goes here
                            "input": question,
                        }

                        local_sem = 0.0
                        local_sfm = 0.0

                        answer_span = answer

                        (
                            paragraph_id,
                            start_character,
                            end_character,
                            bleu,
                        ) = utils.match_answer(
                            answer_span, page, nlp=self.nlp, debug=False
                        )

                        kilt_record_output = {
                            # answer in textual form
                            "answer": answer_span,
                            "provenance": [
                                # list of relevant WikipediaPages / Spans as provenance for the answer from the ks
                                {
                                    "wikipedia_id": page[
                                        "wikipedia_id"
                                    ],  # *mandatory* - ID Wikipedia Page
                                    "title": page[
                                        "wikipedia_title"
                                    ],  # *mandatory* - Title Wikipedia Page
                                    "start_paragraph_id": paragraph_id,  # start paragraph id with relevant info
                                    "start_character": start_character,
                                    "end_paragraph_id": paragraph_id,  # end paragraph id
                                    "end_character": end_character,
                                    "bleu_score": bleu,  # 1.0 when gold data is exactly matched, lower for fuzzy matches
                                    "normalized_aliases": normalized_answers
                                }
                            ],
                        }


                        if bleu == 1:
                            local_sem += 1
                        elif bleu < 1 and bleu >= 0:
                            local_sfm += 1
                        else:
                            print("ERROR: invalid bleu: {}".format(bleu))
                            sys.exit(-1)

                        # update kilt data
                        kilt_record["output"] = kilt_record_output
                        kilt_data.append(kilt_record)

        metadata = [missing_pages]
        return kilt_data, metadata