-
Notifications
You must be signed in to change notification settings - Fork 51
/
cross_validation2.py
89 lines (73 loc) · 3.16 KB
/
cross_validation2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
def cross_validate2():
# Get Data
file_x = 'data/features_sampled.dat'
file_y = 'data/label_class_2.dat'
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
X = StandardScaler().fit_transform(X)
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
#principle component analysis
from sklearn.decomposition import PCA
pca = PCA(n_components=20)
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)
models = []
models.append(('LR', LogisticRegression(random_state = 42)))
models.append(('SVC', SVC(kernel = 'rbf', random_state = 42)))
models.append(('KNN', KNeighborsClassifier(n_neighbors=5)))
models.append(('DT', DecisionTreeClassifier(random_state =42)))
scoring = 'accuracy'
# Cross Validate
results = []
names = []
timer = []
print('Model | Mean of CV | Std. Dev. of CV | Time')
for name, model in models:
start_time = time.time()
kfold = model_selection.KFold(n_splits=5, random_state=42)
cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
t = (time.time() - start_time)
timer.append(t)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f) %f s" % (name, cv_results.mean(), cv_results.std(), t)
print(msg)
models = []
for i in range(1,41):
models.append(('KNN', KNeighborsClassifier(n_neighbors=i)))
results = []
names = []
timer = []
cv_knn = []
print('Model | Mean of CV | Std. Dev. of CV | Time',i)
for name, model in models:
start_time = time.time()
kfold = model_selection.KFold(n_splits=5, random_state=42)
cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
t = (time.time() - start_time)
timer.append(t)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f) %f s" % (name, cv_results.mean(), cv_results.std(), t)
cv_knn.append(cv_results.mean())
print(msg)
print('\nmaximum accuracy for dominance is',max(cv_knn))
plt.figure(figsize=(12, 6))
plt.plot(range(1,41), cv_knn, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('mean cv_results of K Value for dominance')
plt.xlabel('K Value')
plt.ylabel('Mean cv_result')
if __name__ == '__main__':
cross_validate2()