def text_to_instance()

in curiosity/reader.py [0:0]


    def text_to_instance(self, row: List):
        # (1) Prepare facts
        # Set max length of each fact text: 300 characters
        fact = row[0][:300]

        # If it doesn't have any fact, put a default symbol
        if fact == "":
            fact = "@@NOFACT@@"

        # Tokenize facts
        tokenized_fact = (
            [Token(START_SYMBOL)]
            + self._tokenizer.tokenize(fact)[:150]
            + [Token(END_SYMBOL)]
        )

        # (2) Prepare the paraphrased message
        message = row[1]

        # Tokenize
        tokenized_message = (
            [Token(START_SYMBOL)]
            + self._tokenizer.tokenize(message)[:150]
            + [Token(END_SYMBOL)]
        )

        # (3) Prepare dialog acts
        dialog_acts = ["@@NODA@@"]

        # (4) Prepare sender information
        sender = "teacher"

        return Instance(
            {
                "source_tokens": TextField(tokenized_fact, self._token_indexers),
                "target_tokens": TextField(tokenized_message, self._token_indexers),
                "dialog_acts": MultiLabelField(
                    dialog_acts, label_namespace="dialog_acts"
                ),
                "sender": LabelField(sender, label_namespace="sender"),
                "metadata": MetadataField(
                    {
                        "dialog_id": -1,
                        "n_message": -1,
                    }
                ),
            }
        )