parsers/MovieReview/MovieReview_Finetune_Preprocess.py [133:188]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        example_target = example.target

        # get the sentences
        sentences = example.text_a  # text_b always None

        # istantiate a list of features, one per sentence
        example_features = []

        # The parsed sentence with <pos> and <neg> tags recombined
        parsed_example = []

        for sentence in sentences:
            tokens = tokenizer.tokenize(sentence)

            tokens_sentence = []  # the tokens with "corrected" annotation placeholders

            # ------ Finite State Machine to replace specific substrings  ------ #
            left_out_tokens = []
            possible_match = False

            for token in tokens:
                if not possible_match:
                    if token == '<':
                        possible_match = True  # start tracking possible tag
                        left_out_tokens.append(token)
                    else:
                        parsed_example.append(token)
                else:
                    if left_out_tokens == ['<'] and token in ['/', 'ne', 'po'] or \
                            left_out_tokens == ['<', '/'] and token in ['ne'] or \
                            left_out_tokens == ['<', '/'] and token in ['po'] or \
                            left_out_tokens == ['<', 'po'] and token in ['##s'] or \
                            left_out_tokens == ['<', 'ne'] and token in ['##g'] or \
                            left_out_tokens == ['<', '/', 'po'] and token in ['##s'] or \
                            left_out_tokens == ['<', '/', 'ne'] and token in ['##g']:
                        left_out_tokens.append(token)
                    elif left_out_tokens == ['<', '/', 'po', '##s'] and token == '>':
                        parsed_example.append('</pos>')
                        possible_match = False
                        left_out_tokens = []
                    elif left_out_tokens == ['<', 'po', '##s'] and token == '>':
                        parsed_example.append('<pos>')
                        possible_match = False
                        left_out_tokens = []
                    elif left_out_tokens == ['<', '/', 'ne', '##g'] and token == '>':
                        parsed_example.append('</neg>')
                        possible_match = False
                        left_out_tokens = []
                    elif left_out_tokens == ['<', 'ne', '##g'] and token == '>':
                        parsed_example.append('<neg>')
                        possible_match = False
                        left_out_tokens = []
                    else:
                        parsed_example.extend([t for t in left_out_tokens])
                        possible_match = False
                        left_out_tokens = []
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



parsers/Spouse/Spouse_Finetune_Preprocess.py [99:154]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        example_target = example.target

        # get the sentences
        sentences = example.text_a  # text_b always None

        # istantiate a list of features, one per sentence
        example_features = []

        # The parsed sentence with <pos> and <neg> tags recombined
        parsed_example = []

        for sentence in sentences:
            tokens = tokenizer.tokenize(sentence)

            tokens_sentence = []  # the tokens with "corrected" annotation placeholders

            # ------ Finite State Machine to replace specific substrings  ------ #
            left_out_tokens = []
            possible_match = False

            for token in tokens:
                if not possible_match:
                    if token == '<':
                        possible_match = True  # start tracking possible tag
                        left_out_tokens.append(token)
                    else:
                        parsed_example.append(token)
                else:
                    if left_out_tokens == ['<'] and token in ['/', 'ne', 'po'] or \
                            left_out_tokens == ['<', '/'] and token in ['ne'] or \
                            left_out_tokens == ['<', '/'] and token in ['po'] or \
                            left_out_tokens == ['<', 'po'] and token in ['##s'] or \
                            left_out_tokens == ['<', 'ne'] and token in ['##g'] or \
                            left_out_tokens == ['<', '/', 'po'] and token in ['##s'] or \
                            left_out_tokens == ['<', '/', 'ne'] and token in ['##g']:
                        left_out_tokens.append(token)
                    elif left_out_tokens == ['<', '/', 'po', '##s'] and token == '>':
                        parsed_example.append('</pos>')
                        possible_match = False
                        left_out_tokens = []
                    elif left_out_tokens == ['<', 'po', '##s'] and token == '>':
                        parsed_example.append('<pos>')
                        possible_match = False
                        left_out_tokens = []
                    elif left_out_tokens == ['<', '/', 'ne', '##g'] and token == '>':
                        parsed_example.append('</neg>')
                        possible_match = False
                        left_out_tokens = []
                    elif left_out_tokens == ['<', 'ne', '##g'] and token == '>':
                        parsed_example.append('<neg>')
                        possible_match = False
                        left_out_tokens = []
                    else:
                        parsed_example.extend([t for t in left_out_tokens])
                        possible_match = False
                        left_out_tokens = []
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



