-
Notifications
You must be signed in to change notification settings - Fork 1
/
erisk.py
123 lines (95 loc) · 3.54 KB
/
erisk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import numpy as np
def erde_evaluation(goldenTruth_path, algorithmResult_path, o):
data_golden = pd.read_csv(
goldenTruth_path, sep="\t", header=None, names=["subj_id", "true_risk"]
)
data_result = pd.read_csv(
algorithmResult_path,
sep=" ",
header=None,
names=["subj_id", "risk_decision", "delay"],
)
# Merge tables (data) on common field 'subj_id' to compare the true risk and the decision risk
merged_data = data_golden.merge(data_result, on="subj_id", how="left")
# Add column to store the idividual ERDE of each user
merged_data.insert(loc=len(merged_data.columns), column="erde", value=1.0)
# Variables
risk_d = merged_data["risk_decision"]
t_risk = merged_data["true_risk"]
k = merged_data["delay"]
erde = merged_data["erde"]
# Count of how many true positives there are
P_true = len(merged_data[t_risk == 1])
# Count of how many positive cases the system decided there were
P_hat = len(merged_data[risk_d == 1])
# Count of how many of them are actually true positive cases
TP = len(merged_data[(t_risk == 1) & (risk_d == 1)])
# Total count of users
N = len(merged_data)
# ERDE calculus
for i in range(N):
if risk_d[i] == 1 and t_risk[i] == 0:
erde.iat[i] = float(P_true) / N
elif risk_d[i] == 0 and t_risk[i] == 1:
erde.iat[i] = 1.0
elif risk_d[i] == 1 and t_risk[i] == 1:
erde.iat[i] = 1.0 - (1.0 / (1.0 + np.exp(k[i] - o)))
elif risk_d[i] == 0 and t_risk[i] == 0:
erde.iat[i] = 0.0
# Calculus of F1, Precision, Recall and global ERDE
precision = float(TP) / P_hat
recall = float(TP) / P_true
F1 = 2 * (precision * recall) / (precision + recall)
erde_global = erde.mean() * 100
indiv_erde = merged_data.loc[:, ["subj_id", "erde"]]
print(indiv_erde.to_string())
print("Global ERDE (with o = %d): %.2f" % (o, erde_global), "%")
print("F1: %.2f" % F1)
print("Precision: %.2f" % precision)
print("Recall: %.2f" % recall)
def erde_mem(predictions, labels, delays, order=50):
yy = list(zip(predictions, labels))
P_TRUE = sum(labels)
P_HAT = sum(predictions)
TP = yy.count((1, 1))
N = len(yy)
# https://tec.citius.usc.es/ir/pdf/CLEF16_paper.pdf
erde = list()
for i in range(N):
y_hat, y_true = yy[i]
match (y_hat, y_true):
case (0, 0):
loss = 0
case (1, 0):
loss = P_TRUE / N
case (0, 1):
loss = 1
case (1, 1):
penalty = np.exp(delays[i] - order)
loss = 1 - (1 / (1 + penalty))
erde.append(loss)
# Calculus of F1, Precision, Recall and global ERDE
precision = TP / P_HAT
recall = TP / P_TRUE
F1 = 2 * (precision * recall) / (precision + recall)
erde_global = sum(erde) / len(erde)
print(f"Global ERDE({order}): {erde_global}")
print(f"F1: {F1:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
def metrics(predictions, labels):
yy = list(zip(predictions, labels))
P_TRUE = sum(labels)
P_HAT = sum(predictions)
TP = yy.count((1, 1))
TN = yy.count((0, 0))
N = len(yy)
precision = TP / P_HAT
recall = TP / P_TRUE
F1 = 2 * (precision * recall) / (precision + recall)
accuracy = (TP + TN) / N
print(f"F1: {F1:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Accuracy: {accuracy:.2f}")