in parlai/tasks/wizard_of_wikipedia/agents.py [0:0]
def __init__(self, opt, shared=None):
super().__init__(opt, shared)
# get number of examples
self.num_exs = 0
for ep in range(self.num_episodes()):
d = self.data[ep]
for entry in d['dialog']:
if (
entry.get('checked_sentence', None) is not None
and entry.get('checked_sentence') != {}
and TOKEN_NOCHOSEN not in entry.get('checked_sentence')
):
self.num_exs += 1
self.stop_words = [
'i',
'a',
'an',
'am',
'are',
'about',
'as',
'at',
'be',
'by',
'for',
'from',
'how',
'in',
'is',
'it',
'of',
'on',
'or',
'that',
'the',
'this',
'to',
'was',
'what',
'when',
'where',
'--',
'?',
'.',
"''",
"''",
"``",
',',
'do',
'see',
'want',
'people',
'and',
"n't",
"me",
'too',
'own',
'their',
'*',
"'s",
'not',
'than',
'other',
'you',
'your',
'know',
'just',
'but',
'does',
'really',
'have',
'into',
'more',
'also',
'has',
'any',
'why',
'will',
'with',
'well',
'still',
'he',
'she',
'we',
'may',
'these',
'his',
'hers',
'which',
'such',
'they',
'its',
'were',
'my',
'there',
';',
'-',
':',
'|',
'&',
')',
'(',
]
try:
import nltk
except ImportError:
raise ImportError('Please install nltk (e.g. pip install nltk).')
# nltk-specific setup
st_path = 'tokenizers/punkt/{0}.pickle'.format('english')
try:
self.sent_tok = nltk.data.load(st_path)
except LookupError:
nltk.download('punkt')
self.sent_tok = nltk.data.load(st_path)
self.teacher_type = opt.get('teacher_type')