From 72fa263a682c5e7cd0acdeec3c49b5baf922d05b Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Sun, 25 Aug 2024 07:34:10 -0400 Subject: [PATCH 1/3] remove files if they are not in the set of mimic tables --- mimic-iv/buildmimic/sqlite/import.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mimic-iv/buildmimic/sqlite/import.py b/mimic-iv/buildmimic/sqlite/import.py index f99abd21..6a4d52dd 100644 --- a/mimic-iv/buildmimic/sqlite/import.py +++ b/mimic-iv/buildmimic/sqlite/import.py @@ -96,6 +96,11 @@ def main(): print(f"Missing tables: {missing_tables}") sys.exit() + # subselect to only tables in the above list + data_files = [f for f, t in zip(data_files, tablenames) if t in _MIMIC_TABLES] + tablenames = [t for t in tablenames if t in _MIMIC_TABLES] + print(f"Importing {len(tablenames)} files.") + pt = None subjects = None if args.limit > 0: From 2cd33602afb7b86e77244f97d34f3be37883b295 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Sun, 25 Aug 2024 07:39:44 -0400 Subject: [PATCH 2/3] remove specifying format of date --- mimic-iv/buildmimic/sqlite/import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mimic-iv/buildmimic/sqlite/import.py b/mimic-iv/buildmimic/sqlite/import.py index 6a4d52dd..bf93e251 100644 --- a/mimic-iv/buildmimic/sqlite/import.py +++ b/mimic-iv/buildmimic/sqlite/import.py @@ -50,7 +50,7 @@ def process_dataframe(df: pd.DataFrame, subjects: t.Optional[t.List[int]] = None) -> pd.DataFrame: for c in df.columns: if c.endswith('time') or c.endswith('date'): - df[c] = pd.to_datetime(df[c], format='ISO8601') + df[c] = pd.to_datetime(df[c]) if subjects is not None and 'subject_id' in df: df = df.loc[df['subject_id'].isin(subjects)] From f925c03031b79ddde5b87b23ff04945d44e015a3 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Sun, 25 Aug 2024 07:42:06 -0400 Subject: [PATCH 3/3] print files which are ignored --- mimic-iv/buildmimic/sqlite/import.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mimic-iv/buildmimic/sqlite/import.py b/mimic-iv/buildmimic/sqlite/import.py index bf93e251..02b4f0b3 100644 --- a/mimic-iv/buildmimic/sqlite/import.py +++ b/mimic-iv/buildmimic/sqlite/import.py @@ -97,10 +97,14 @@ def main(): sys.exit() # subselect to only tables in the above list + ignored_files = set([f for f, t in zip(data_files, tablenames) if t not in _MIMIC_TABLES]) data_files = [f for f, t in zip(data_files, tablenames) if t in _MIMIC_TABLES] tablenames = [t for t in tablenames if t in _MIMIC_TABLES] print(f"Importing {len(tablenames)} files.") + if ignored_files: + print(f"Ignoring {len(ignored_files)} files: {ignored_files}") + pt = None subjects = None if args.limit > 0: