-
Notifications
You must be signed in to change notification settings - Fork 10
/
5. spam.py
24 lines (17 loc) · 1.01 KB
/
5. spam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from spam_data import training_spam_docs, training_doc_tokens, training_labels, test_labels, test_spam_docs, training_docs, test_docs
from sklearn.naive_bayes import MultinomialNB
# Import CountVectorizer from sklearn:
from sklearn.feature_extraction.text import CountVectorizer
# Define bow_vectorizer:
bow_vectorizer = CountVectorizer()
# Define training_vectors:
training_vectors = bow_vectorizer.fit_transform(training_docs)
# Define test_vectors:
test_vectors = bow_vectorizer.transform(test_docs)
spam_classifier = MultinomialNB()
def spam_or_not(label):
return "spam" if label else "not spam"
# Uncomment the code below when you're done:
spam_classifier.fit(training_vectors, training_labels)
predictions = spam_classifier.score(test_vectors, test_labels)
print("The predictions for the test data were {0}% accurate.\n\nFor example, '{1}' was classified as {2}.\n\nMeanwhile, '{3}' was classified as {4}.".format(predictions * 100, test_docs[7], spam_or_not(test_labels[7]), test_docs[15], spam_or_not(test_labels[15])))