parsers/MovieReview/MovieReview_Preprocess.py [250:288]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    assert not annotate_as_neg
                    annotate_as_pos = False
                else:
                    if annotate_as_neg or annotate_as_pos:
                        annotations.append(1)
                    else:
                        annotations.append(0)
                    tokens.append(token)
                    input_type_ids.append(0)

            tokens.append("[SEP]")
            input_type_ids.append(0)
            annotations.append(0)

            sentences.append((tokens, annotations, input_type_ids))

        # Now it is time to store things

        # we also create a sentence ID, it may be useful
        sentence_unique_id = example_unique_id

        for tokens, annotations, input_type_ids in sentences:

            # THIS CREATES THE BERT REAL INPUT
            input_ids = tokenizer.convert_tokens_to_ids(tokens)

            # The mask has 1 for real tokens and 0 for padding tokens. Only real
            # tokens are attended to.
            input_mask = [1] * len(input_ids)

            # Zero-pad up to the sequence length.
            while len(input_ids) < seq_length:
                input_ids.append(0)
                input_mask.append(0)
                input_type_ids.append(0)

            assert len(input_ids) == seq_length
            assert len(input_mask) == seq_length
            assert len(input_type_ids) == seq_length
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



parsers/Spouse/Spouse_Preprocess.py [329:367]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    assert not annotate_as_neg
                    annotate_as_pos = False
                else:
                    if annotate_as_neg or annotate_as_pos:
                        annotations.append(1)
                    else:
                        annotations.append(0)
                    tokens.append(token)
                    input_type_ids.append(0)

            tokens.append("[SEP]")
            input_type_ids.append(0)
            annotations.append(0)

            sentences.append((tokens, annotations, input_type_ids))

        # Now it is time to store things

        # we also create a sentence ID, it may be useful
        sentence_unique_id = example_unique_id

        for tokens, annotations, input_type_ids in sentences:

            # THIS CREATES THE BERT REAL INPUT
            input_ids = tokenizer.convert_tokens_to_ids(tokens)

            # The mask has 1 for real tokens and 0 for padding tokens. Only real
            # tokens are attended to.
            input_mask = [1] * len(input_ids)

            # Zero-pad up to the sequence length.
            while len(input_ids) < seq_length:
                input_ids.append(0)
                input_mask.append(0)
                input_type_ids.append(0)

            assert len(input_ids) == seq_length
            assert len(input_mask) == seq_length
            assert len(input_type_ids) == seq_length
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



