in spamfilter.py [0:0]
def reload_spamdb(self):
""" This is how corpus/spamdb.json was built..."""
spamdb = requests.get(
"https://raw.githubusercontent.com/zmohammad01/nlc-email-spam/master/data/Email-testingdata.json"
).json()
for corpus in spamdb:
words = self.tokenify(corpus["Text"])
if corpus["Class"] == "spam":
self.spam_words.extend(words)
else:
self.ham_words.extend(words)
spamdb = json.loads(
requests.get(
"https://raw.githubusercontent.com/cdimascio/watson-nlc-spam/master/data/SpamHam-Train.json"
).text[:-2]
)
for corpus in spamdb["training_data"]:
words = self.tokenify(corpus["text"])
if "spam" in corpus["classes"]:
self.spam_words.extend(words)
else:
self.ham_words.extend(words)
with open("corpus/spamdb.json", "w") as f:
json.dump({"spam": self.spam_words, "ham": self.ham_words}, f)
f.close()