-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
87 lines (77 loc) · 3.44 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, roc_curve, auc, average_precision_score, confusion_matrix, precision_recall_curve, roc_auc_score, classification_report
def accuracy(y, y_hat):
y_y_hat = list(zip(y, y_hat))
tp = sum([1 for i in y_y_hat if i[0] == 1 and i[1] == 1])
tn = sum([1 for i in y_y_hat if i[0] == 0 and i[1] == 0])
return (tp + tn) / float(len(y_y_hat))
def f1(y, y_hat):
precision_score = precision(y, y_hat)
recall_score = recall(y, y_hat)
numerator = precision_score * recall_score
denominator = precision_score + recall_score
return 2 * (numerator / denominator)
def precision(y, y_hat):
y_y_hat = list(zip(y, y_hat))
tp = sum([1 for i in y_y_hat if i[0] == 1 and i[1] == 1])
fp = sum([1 for i in y_y_hat if i[0] == 0 and i[1] == 1])
return tp / float(tp + fp)
def recall(y, y_hat):
# Your code here
y_y_hat = list(zip(y, y_hat))
tp = sum([1 for i in y_y_hat if i[0] == 1 and i[1] == 1])
fn = sum([1 for i in y_y_hat if i[0] == 1 and i[1] == 0])
return tp / float(tp + fn)
def auc(X, y, model):
probs = model.predict_proba(X)[:,1]
return roc_auc_score(y, probs)
def aps(X, y, model):
probs = model.predict_proba(X)[:,1]
return average_precision_score(y, probs)
def get_metrics(X_tr, y_tr, X_val, y_val, y_pred_tr, y_pred_val, model):
ac_tr = accuracy_score(y_tr, y_pred_tr)
ac_val= accuracy_score(y_val, y_pred_val)
f1_tr = f1_score(y_tr, y_pred_tr)
f1_val = f1_score(y_val, y_pred_val)
au_tr = auc(X_tr, y_tr, model)
au_val = auc(X_val, y_val, model)
rc_tr = recall_score(y_tr, y_pred_tr)
rc_val = recall_score(y_val, y_pred_val)
pr_tr = precision_score(y_tr, y_pred_tr)
pr_val = precision_score(y_val, y_pred_val)
aps_tr = aps(X_tr, y_tr, model)
aps_val = aps(X_val, y_val, model)
print('Training Accuracy: ', ac_tr)
print('Validation Accuracy: ', ac_val)
print('Training F1 Score: ', f1_tr)
print('Validation F1 Score: ', f1_val)
print('Training AUC Score: ', au_tr)
print('Validation AUC Score: ', au_val)
print('Training Recall Score: ', rc_tr)
print('Validation Recall Score: ', rc_val)
print('Training Precision Score: ', pr_tr)
print('Validation Precision Score: ', pr_val)
print('Training Average Precision Score: ', aps_tr)
print('Validation Average Precision Score: ', aps_val)
print('')
print("Training Classification Report: ")
print(classification_report(y_tr, y_pred_tr))
print("")
print("Validation Classification Report: ")
print(classification_report(y_val, y_pred_val))
cnf = confusion_matrix(y_val, y_pred_val)
group_names = ['TN','FP','FN','TP']
group_counts = ['{0:0.0f}'.format(value) for value in cnf.flatten()]
group_percentages = ['{0:.2%}'.format(value) for value in cnf.flatten()/np.sum(cnf)]
labels = [f'{v1}\n{v2}\n{v3}' for v1, v2, v3 in zip(group_names, group_counts, group_percentages)]
labels = np.asarray(labels).reshape(2,2)
sns.heatmap(cnf, annot=labels, fmt='', cmap='Blues', annot_kws={'size':16})
def plot_feature_importances(X, model):
n_features = X.shape[1]
plt.figure(figsize=(8, 8))
plt.barh(range(n_features), model.feature_importances_, align='center')
plt.yticks(np.arange(n_features), X.columns.values)
plt.xlabel("Feature Importance")
plt.ylabel('Feature')