parsers/MovieReview/MovieReview_Finetune_Preprocess.py [133:188]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - example_target = example.target # get the sentences sentences = example.text_a # text_b always None # istantiate a list of features, one per sentence example_features = [] # The parsed sentence with and tags recombined parsed_example = [] for sentence in sentences: tokens = tokenizer.tokenize(sentence) tokens_sentence = [] # the tokens with "corrected" annotation placeholders # ------ Finite State Machine to replace specific substrings ------ # left_out_tokens = [] possible_match = False for token in tokens: if not possible_match: if token == '<': possible_match = True # start tracking possible tag left_out_tokens.append(token) else: parsed_example.append(token) else: if left_out_tokens == ['<'] and token in ['/', 'ne', 'po'] or \ left_out_tokens == ['<', '/'] and token in ['ne'] or \ left_out_tokens == ['<', '/'] and token in ['po'] or \ left_out_tokens == ['<', 'po'] and token in ['##s'] or \ left_out_tokens == ['<', 'ne'] and token in ['##g'] or \ left_out_tokens == ['<', '/', 'po'] and token in ['##s'] or \ left_out_tokens == ['<', '/', 'ne'] and token in ['##g']: left_out_tokens.append(token) elif left_out_tokens == ['<', '/', 'po', '##s'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] elif left_out_tokens == ['<', 'po', '##s'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] elif left_out_tokens == ['<', '/', 'ne', '##g'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] elif left_out_tokens == ['<', 'ne', '##g'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] else: parsed_example.extend([t for t in left_out_tokens]) possible_match = False left_out_tokens = [] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - parsers/Spouse/Spouse_Finetune_Preprocess.py [99:154]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - example_target = example.target # get the sentences sentences = example.text_a # text_b always None # istantiate a list of features, one per sentence example_features = [] # The parsed sentence with and tags recombined parsed_example = [] for sentence in sentences: tokens = tokenizer.tokenize(sentence) tokens_sentence = [] # the tokens with "corrected" annotation placeholders # ------ Finite State Machine to replace specific substrings ------ # left_out_tokens = [] possible_match = False for token in tokens: if not possible_match: if token == '<': possible_match = True # start tracking possible tag left_out_tokens.append(token) else: parsed_example.append(token) else: if left_out_tokens == ['<'] and token in ['/', 'ne', 'po'] or \ left_out_tokens == ['<', '/'] and token in ['ne'] or \ left_out_tokens == ['<', '/'] and token in ['po'] or \ left_out_tokens == ['<', 'po'] and token in ['##s'] or \ left_out_tokens == ['<', 'ne'] and token in ['##g'] or \ left_out_tokens == ['<', '/', 'po'] and token in ['##s'] or \ left_out_tokens == ['<', '/', 'ne'] and token in ['##g']: left_out_tokens.append(token) elif left_out_tokens == ['<', '/', 'po', '##s'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] elif left_out_tokens == ['<', 'po', '##s'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] elif left_out_tokens == ['<', '/', 'ne', '##g'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] elif left_out_tokens == ['<', 'ne', '##g'] and token == '>': parsed_example.append('') possible_match = False left_out_tokens = [] else: parsed_example.extend([t for t in left_out_tokens]) possible_match = False left_out_tokens = [] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -