forked from lukas/ml-class
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest-algorithm-dummy.py
32 lines (22 loc) · 924 Bytes
/
test-algorithm-dummy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pandas as pd
import numpy as np
df = pd.read_csv('tweets.csv')
target = df['is_there_an_emotion_directed_at_a_brand_or_product']
text = df['tweet_text']
fixed_text = text[pd.notnull(text)]
fixed_target = target[pd.notnull(text)]
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
count_vect.fit(fixed_text)
counts = count_vect.transform(fixed_text)
from sklearn.naive_bayes import MultinomialNB
from sklearn.dummy import DummyClassifier
nb = DummyClassifier(strategy='most_frequent')
prop_train = 0.7
n_train = int(np.ceil(fixed_target.shape[0] * prop_train))
n_test = fixed_target.shape[0] - n_train
print('training on {} examples ({:.1%})'.format(n_train, prop_train))
print('testing on {} examples'.format(n_test))
nb.fit(counts[:n_train], fixed_target[:n_train])
predictions = nb.predict(counts[n_train:])
print(sum(predictions == fixed_target[n_train:])/n_test)