in torchmoji/word_generator.py [0:0]
def validated_tweet(self, data):
''' A bunch of checks to determine whether the tweet is valid.
Also returns emojis contained by the tweet.
'''
# Ordering of validations is important for speed
# If it passes all checks, then the tweet is validated for usage
# Skips incomplete tweets
if len(data) <= 9:
return False, []
text = data[9]
if self.ignore_retweets and RETWEETS_RE.search(text):
return False, []
if self.ignore_url_tweets and URLS_RE.search(text):
return False, []
if self.ignore_mention_tweets and MENTION_RE.search(text):
return False, []
if self.wanted_emojis is not None:
uniq_emojis = np.unique(extract_emojis(text, self.wanted_emojis))
if len(uniq_emojis) == 0:
return False, []
else:
uniq_emojis = []
if self.non_english_user_set is not None and \
non_english_user(data[1], self.non_english_user_set):
return False, []
return True, uniq_emojis