parsers/Hatespeech/Hatespeech_Preprocess.py [93:123]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        idx = 0
        for example in dataset:
            idx += 1
            if idx % 100 == 0:
                print(f'Parsing sample no {idx}', end='')
                print('\r', end='')

            doc = example['sample']

            doc = pipeline(doc)

            new_example = []
            first_sentence = True
            sentence = []

            for token in doc:
                if token.is_sent_start and first_sentence:
                    sentence.append(token.text)
                    first_sentence = False

                elif token.is_sent_start and not first_sentence:
                    new_example.append(" ".join(sentence))
                    sentence = []
                    sentence.append(token.text)
                else:
                    sentence.append(token.text)

            new_example.append(" ".join(sentence))
            example['sample'] = new_example

        print('')
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


parsers/MovieReview/MovieReview_Finetune_Preprocess.py [85:115]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        idx = 0
        for example in dataset:
            idx += 1
            if idx % 100 == 0:
                print(f'Parsing sample no {idx}', end='')
                print('\r', end='')

            doc = example['sample']
            doc = pipeline(doc)

            new_example = []
            first_sentence = True
            sentence = []

            for token in doc:
                if token.is_sent_start and first_sentence:
                    sentence.append(token.text)
                    first_sentence = False

                elif token.is_sent_start and not first_sentence:
                    new_example.append(" ".join(sentence))
                    sentence = []
                    sentence.append(token.text)
                else:
                    sentence.append(token.text)

            new_example.append(" ".join(sentence))
            example['sample'] = new_example
            # print(new_example)

        print('')
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -