-
Notifications
You must be signed in to change notification settings - Fork 0
/
score_submission.py
31 lines (24 loc) · 1.24 KB
/
score_submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
import numpy as np
import sklearn.metrics as metrics
VITALS = ['LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']
TESTS = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total',
'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2',
'LABEL_Bilirubin_direct', 'LABEL_EtCO2']
def get_score(df_true, df_submission):
df_submission = df_submission.sort_values('pid')
df_true = df_true.sort_values('pid')
task1 = np.mean([metrics.roc_auc_score(df_true[entry], df_submission[entry]) for entry in TESTS])
task2 = metrics.roc_auc_score(df_true['LABEL_Sepsis'], df_submission['LABEL_Sepsis'])
task3 = np.mean([0.5 + 0.5 * np.maximum(0, metrics.r2_score(df_true[entry], df_submission[entry])) for entry in VITALS])
score = np.mean([task1, task2, task3])
print(task1, task2, task3)
return score
filename = 'sample.zip'
df_submission = pd.read_csv(filename)
# generate a baseline based on sample.zip
df_true = pd.read_csv(filename)
for label in TESTS + ['LABEL_Sepsis']:
# round classification labels
df_true[label] = np.around(df_true[label].values)
print('Score of sample.zip with itself as groundtruth', get_score(df_true, df_submission))