From a625940bb7f44ce7ffb2353ba9bfbd93b622953c Mon Sep 17 00:00:00 2001 From: a-hartens Date: Wed, 20 Nov 2024 11:54:17 +0100 Subject: [PATCH] functioning sex and continuous coverage phenotypes and tests --- mkdocs.yml | 1 + phenex/codelists/codelists.py | 3 +- phenex/filters/aggregator.py | 7 +- phenex/filters/categorical_filter.py | 9 +- phenex/filters/relative_time_range_filter.py | 40 ++-- phenex/mappers.py | 33 ++- phenex/phenotypes/categorical_phenotype.py | 9 +- .../continuous_coverage_phenotype.py | 189 ++++++++++-------- phenex/phenotypes/death_phenotype.py | 13 +- phenex/phenotypes/sex_phenotype.py | 17 +- phenex/test/phenotype_test_generator.py | 3 +- .../test_continuous_coverage_phenotype.py | 43 ++-- .../test/phenotypes/test_death_phenotype.py | 68 ++----- phenex/test/phenotypes/test_sex_phenotype.py | 8 +- 14 files changed, 235 insertions(+), 208 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index b23d0ab..3da90f3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,6 +20,7 @@ nav: - AgePhenotype: api/phenotypes/age_phenotype.md - SexPhenotype: api/phenotypes/sec_phenotype.md - DeathPhenotype: api/phenotypes/death_phenotype.md + - ContinuousCoveragePhenotype: api/phenotypes/continuous_coverage_phenotype.md - AgePhenotype: api/phenotypes/age_phenotype.md - ArithmeticPhenotype: api/phenotypes/arithmetic_phenotype.md - LogicPhenotype: api/phenotypes/logic_phenotype.md diff --git a/phenex/codelists/codelists.py b/phenex/codelists/codelists.py index 705a9ed..748b660 100644 --- a/phenex/codelists/codelists.py +++ b/phenex/codelists/codelists.py @@ -156,11 +156,10 @@ def to_pandas(self) -> pd.DataFrame: """ _df = pd.DataFrame(self.to_tuples(), columns=["code_type", "code"]) - _df['codelist'] = self.name + _df["codelist"] = self.name return _df - class LocalCSVCodelistFactory: """ """ diff --git a/phenex/filters/aggregator.py b/phenex/filters/aggregator.py index 2ca75d0..d6f88c3 100644 --- a/phenex/filters/aggregator.py +++ b/phenex/filters/aggregator.py @@ -8,7 +8,7 @@ def __init__( aggregation_index=["PERSON_ID"], aggregation_function="sum", event_date_column="EVENT_DATE", - reduce=False + reduce=False, ): self.aggregation_index = aggregation_index self.aggregation_function = aggregation_function @@ -40,7 +40,9 @@ def aggregate(self, input_table: Table): input_table = input_table.mutate(aggregated_date=aggregated_date) # Filter rows where the original date matches the aggregated date - input_table = input_table.filter(input_table[self.event_date_column] == input_table.aggregated_date) + input_table = input_table.filter( + input_table[self.event_date_column] == input_table.aggregated_date + ) # Select the necessary columns @@ -52,6 +54,7 @@ def aggregate(self, input_table: Table): return input_table + class Nearest(VerticalDateAggregator): def __init__(self, **kwargs): super().__init__(aggregation_function="max", **kwargs) diff --git a/phenex/filters/categorical_filter.py b/phenex/filters/categorical_filter.py index f44039a..a92eda2 100644 --- a/phenex/filters/categorical_filter.py +++ b/phenex/filters/categorical_filter.py @@ -2,6 +2,7 @@ from typing import List, Optional, Union from ibis.expr.types.relations import Table + class CategoricalFilter(Filter): """ This class filters events in an EventTable based on specified categorical values @@ -19,10 +20,10 @@ class CategoricalFilter(Filter): """ def __init__( - self, - column_name: str, - allowed_values: List[Union[str, int]], - domain: Optional[str] = None + self, + column_name: str, + allowed_values: List[Union[str, int]], + domain: Optional[str] = None, ): self.column_name = column_name self.allowed_values = allowed_values diff --git a/phenex/filters/relative_time_range_filter.py b/phenex/filters/relative_time_range_filter.py index 4007573..7b1e98e 100644 --- a/phenex/filters/relative_time_range_filter.py +++ b/phenex/filters/relative_time_range_filter.py @@ -56,24 +56,7 @@ def __init__( when: Optional[str] = "before", anchor_phenotype: "Phenotype" = None, ): - if min_days is not None: - assert min_days.operator in [ - ">", - ">=", - ], f"min_days operator must be > or >=, not {min_days.operator}" - if max_days is not None: - assert max_days.operator in [ - "<", - "<=", - ], f"max_days operator must be > or >=, not {max_days.operator}" - if max_days is not None and min_days is not None: - assert ( - min_days.value <= max_days.value - ), f"min_days must be less than or equal to max_days" - assert when in [ - "before", - "after", - ], f"when must be 'before' or 'after', not {when}" + verify_relative_time_range_filter_input(min_days, max_days, when) self.min_days = min_days self.max_days = max_days @@ -125,3 +108,24 @@ def _filter(self, table: EventTable): table = table.filter(conditions) return table + + +def verify_relative_time_range_filter_input(min_days, max_days, when): + if min_days is not None: + assert min_days.operator in [ + ">", + ">=", + ], f"min_days operator must be > or >=, not {min_days.operator}" + if max_days is not None: + assert max_days.operator in [ + "<", + "<=", + ], f"max_days operator must be > or >=, not {max_days.operator}" + if max_days is not None and min_days is not None: + assert ( + min_days.value <= max_days.value + ), f"min_days must be less than or equal to max_days" + assert when in [ + "before", + "after", + ], f"when must be 'before' or 'after', not {when}" diff --git a/phenex/mappers.py b/phenex/mappers.py index db7314a..7e8aeca 100644 --- a/phenex/mappers.py +++ b/phenex/mappers.py @@ -58,7 +58,13 @@ def rename(self, table: Table) -> Table: mapping = copy.deepcopy(asdict(self)) mapping.pop("NAME_TABLE") # delete optional params from mapping - for key in ["DATE_OF_BIRTH", "DATE_OF_DEATH", "YEAR_OF_BIRTH", "SEX", "ETHNICITY"]: + for key in [ + "DATE_OF_BIRTH", + "DATE_OF_DEATH", + "YEAR_OF_BIRTH", + "SEX", + "ETHNICITY", + ]: if getattr(self, key) is None: del mapping[key] return table.rename(**mapping) @@ -118,6 +124,7 @@ class MeasurementTableColumnMapper(CodeTableColumnMapper): VALUE: str = "VALUE" + @dataclass class ObservationPeriodTableMapper: NAME_TABLE: str = "OBSERVATION_PERIOD" @@ -139,26 +146,30 @@ def rename(self, table: Table) -> Table: mapping.pop("NAME_TABLE") return table.rename(**mapping) + # # OMOP Column Mappers # OMOPPersonTableColumnMapper = PersonTableColumnMapper( - NAME_TABLE="PERSON", PERSON_ID="PERSON_ID", + NAME_TABLE="PERSON", + PERSON_ID="PERSON_ID", DATE_OF_BIRTH="BIRTH_DATETIME", YEAR_OF_BIRTH="YEAR_OF_BIRTH", - SEX="GENDER_CONCEPT_ID", ETHNICITY="ETHNICITY_CONCEPT_ID" + SEX="GENDER_CONCEPT_ID", + ETHNICITY="ETHNICITY_CONCEPT_ID", ) OMOPDeathTableColumnMapper = PersonTableColumnMapper( - NAME_TABLE="DEATH", PERSON_ID="PERSON_ID", - DATE_OF_DEATH="DEATH_DATE" + NAME_TABLE="DEATH", PERSON_ID="PERSON_ID", DATE_OF_DEATH="DEATH_DATE" ) OMOPPersonTableSourceColumnMapper = PersonTableColumnMapper( - NAME_TABLE="PERSON", PERSON_ID="PERSON_ID", + NAME_TABLE="PERSON", + PERSON_ID="PERSON_ID", DATE_OF_BIRTH="BIRTH_DATETIME", YEAR_OF_BIRTH="YEAR_OF_BIRTH", - SEX="GENDER_SOURCE_VALUE", ETHNICITY="ETHNICITY_SOURCE_VALUE" + SEX="GENDER_SOURCE_VALUE", + ETHNICITY="ETHNICITY_SOURCE_VALUE", ) OMOPConditionOccurrenceColumnMapper = CodeTableColumnMapper( @@ -223,12 +234,14 @@ def rename(self, table: Table) -> Table: OMOPDomains = DomainsDictionary(**OMOPColumnMappers) - # # Vera Column Mappers # VeraPersonTableColumnMapper = PersonTableColumnMapper( - NAME_TABLE="PERSON", PERSON_ID="PERSON_ID", DATE_OF_BIRTH="BIRTH_DATETIME", DATE_OF_DEATH="DEATH_DATETIME" + NAME_TABLE="PERSON", + PERSON_ID="PERSON_ID", + DATE_OF_BIRTH="BIRTH_DATETIME", + DATE_OF_DEATH="DEATH_DATETIME", ) VeraConditionOccurrenceColumnMapper = CodeTableColumnMapper( @@ -268,4 +281,4 @@ def rename(self, table: Table) -> Table: # # Domains # -VeraDomains = DomainsDictionary(**VeraColumnMappers) \ No newline at end of file +VeraDomains = DomainsDictionary(**VeraColumnMappers) diff --git a/phenex/phenotypes/categorical_phenotype.py b/phenex/phenotypes/categorical_phenotype.py index fcbc3ae..d74f625 100644 --- a/phenex/phenotypes/categorical_phenotype.py +++ b/phenex/phenotypes/categorical_phenotype.py @@ -29,12 +29,13 @@ class HospitalizationPhenotype(Phenotype): _execute(tables: Dict[str, Table]) -> PhenotypeTable: Executes the filtering process on the provided tables and returns the filtered phenotype table. """ + def __init__( self, domain, column_name: str, allowed_values: List[str], - name = None, + name=None, date_range: DateRangeFilter = None, relative_time_range: Union[ RelativeTimeRangeFilter, List[RelativeTimeRangeFilter] @@ -43,7 +44,9 @@ def __init__( ): super(HospitalizationPhenotype, self).__init__() - self.categorical_filter = CategoricalFilter(column_name=column_name, allowed_values=allowed_values) + self.categorical_filter = CategoricalFilter( + column_name=column_name, allowed_values=allowed_values + ) self.name = name self.date_range = date_range self.return_date = return_date @@ -93,4 +96,4 @@ def _perform_date_selection(self, code_table): aggregator = Last() else: raise ValueError(f"Unknown return_date: {self.return_date}") - return aggregator.aggregate(code_table) \ No newline at end of file + return aggregator.aggregate(code_table) diff --git a/phenex/phenotypes/continuous_coverage_phenotype.py b/phenex/phenotypes/continuous_coverage_phenotype.py index b832ebf..d9e0c24 100644 --- a/phenex/phenotypes/continuous_coverage_phenotype.py +++ b/phenex/phenotypes/continuous_coverage_phenotype.py @@ -1,8 +1,12 @@ from typing import Union, List, Dict, Optional +from phenex.mappers import ObservationPeriodTableMapper from phenex.phenotypes.phenotype import Phenotype -from phenex.filters.value import Value +from phenex.filters.value import GreaterThanOrEqualTo, Value from phenex.filters.codelist_filter import CodelistFilter -from phenex.filters.relative_time_range_filter import RelativeTimeRangeFilter +from phenex.filters.relative_time_range_filter import ( + verify_relative_time_range_filter_input, + RelativeTimeRangeFilter, +) from phenex.filters.date_range_filter import DateRangeFilter from phenex.filters.aggregator import First, Last from phenex.codelists import Codelist @@ -15,93 +19,112 @@ class ContinuousCoveragePhenotype(Phenotype): """ - A phenotype based on continuous coverage within an observation period. - - This class helps generate SQL queries to filter a population based on - continuous coverage criteria within the observation period. - - :param domain: The domain of the phenotype, default is 'observation_period'. The domain - key is used at runtime to determine which table to run on. - :param coverage_period_min: The minimum coverage period for the phenotype with a default - of 0 days. The operator must be '>=' or '>'. - :param return_date: An optional return date for the phenotype result. Possible values are - "first" and "last", where "first" is the beginning of the coverage period containing - the index date and "last" in the end of the coverage period containing the index date. - - Example usage: Find all patients with at least 90 days of continuous coverage - -------------- - >>> coverage_min_filter = ValueFilter(">=", 90) - >>> phenotype = ContinuousCoveragePhenotype(coverage_period_min=coverage_min_filter) + ContinuousCoveragePhenotype identifies patients based on duration of observation data. ContinuousCoveragePhenotype requires an anchor phenotype, typically the entry criterion. It then identifies an observation time period that contains the anchor phenotype. The phenotype can then be used to identify patients with a user specified continuous coverage before or after the anchor phenotype. + + There are two primary use cases for ContinuousCoveragePhenotype: + 1. Identify patients with some minimum duration of coverage prior to anchor_phenotype date e.g. "identify patients with 1 year of continuous coverage prior to index date" + 2. Determine the date of loss to followup (right censoring) i.e. the duration of coverage after the anchor_phenotype event + + ## Data for ContinuousCoveragePhenotype + This phenotype requires a table with PersonID and a coverage start date and end date. Depending on the datasource used, this information is a separate ObservationPeriod table or found in the PersonTable. Use an ObservationPeriodTableMapper to map required coverage start and end date columns. + + | PersonID | coverageStartDate | coverageEndDate | + |-------------|----------------------|--------------------| + | 1 | 2009-01-01 | 2010-01-01 | + | 2 | 2008-01-01 | 2010-01-02 | + + One assumption that is made by ContinuousCoveragePhenotype is that there are **NO overlapping coverage periods**. + """ - def __init__(self, - name:Optional[str] = 'continuous_coverage', - domain:Optional[str] = 'OBSERVATION_PERIOD', - relative_time_range:Optional[RelativeTimeRangeFilter] = None, - min_days : Optional[Value] = None, - anchor_phenotype:Optional[Phenotype] = None, + def __init__( + self, + name: Optional[str] = "continuous_coverage", + domain: Optional[str] = "OBSERVATION_PERIOD", + min_days: Optional[Value] = None, + max_days: Optional[Value] = None, + when: Optional[str] = "before", + anchor_phenotype: Optional[Phenotype] = None, ): + """ + Parameters: + name: The name of the phenotype. Default is provided by parameters when, min_days, max_days, and anchor_phenotype. + domain: The domain of the phenotype. Default is 'observation_period'. + min_days: The minimum number of days of continuous coverage. The operator must be '>=' or '>'. + max_days: The maximum number of days of continuous coverage. The operator must be '<=' or '<'. + when: 'before', 'after'. If before, the return date is the start of the coverage period containing the anchor_phenotype. If after, the return date is the end of the coverage period containing the anchor_phenotype. + Example : + ```python + + # make sure to create an entry phenotype, for example 'atrial fibrillation diagnosis' + entry_phenotype = CodelistPhenotype(...) + + # one year continuous coverage prior to index + one_year_coverage = ContinuousCoveragePhenotype( + when = 'before', + min_days = GreaterThanOrEqualTo(365), + anchor_phenotype = entry_phenotype + ) + + # determine the date of loss to followup + loss_to_followup = ContinuousCoveragePhenotype( + when = 'after', + anchor_phenotype = entry_phenotype + ) + ``` + """ super().__init__() self.name = name self.domain = domain - self.relative_time_range = relative_time_range + verify_relative_time_range_filter_input(min_days, max_days, when) self.min_days = min_days + self.max_days = max_days + self.when = when + self.anchor_phenotype = anchor_phenotype + if self.anchor_phenotype is not None: + self.children.append(self.anchor_phenotype) def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: - coverage_table = tables[self.domain] - # first perform time range filter on observation period start date - coverage_table = coverage_table.mutate(EVENT_DATE = coverage_table.OBSERVATION_PERIOD_START_DATE) - coverage_table = self._perform_time_filtering(coverage_table) - # ensure that coverage end extends past the anchor date - coverage_table = self._filter_observation_period_end(coverage_table) - coverage_table = self._filter_coverage_period(coverage_table) - - coverage_table = coverage_table.mutate(EVENT_DATE = ibis.null()) - return coverage_table - - def _perform_time_filtering(self, coverage_table): - ''' - Filter the observation period start - ''' - if self.relative_time_range is not None: - coverage_table = self.relative_time_range.filter(coverage_table) - return coverage_table - - def _filter_observation_period_end(self, coverage_table): - ''' - Get only rows where the observation period end date is after the anchor date - ''' - if self.relative_time_range is not None: - if self.relative_time_range.anchor_phenotype is not None: - reference_column = self.relative_time_range.anchor_phenotype.table.EVENT_DATE - else: - reference_column = coverage_table.INDEX_DATE - - coverage_table = coverage_table.filter( - coverage_table.OBSERVATION_PERIOD_END_DATE >= reference_column - ) - return coverage_table - - - def _filter_coverage_period(self, coverage_table: Table) -> Table: - if self.min_days.operator == '>': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] > self.min_days.value) - ) - elif self.min_days.operator == '>=': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] >= self.min_days.value) - ) - elif self.min_days.operator == '<': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] < self.min_days.value) - ) - elif self.min_days.operator == '<=': - coverage_table = coverage_table.filter( - (coverage_table['DAYS_FROM_ANCHOR'] <= self.min_days.value) - ) - return coverage_table - - - def get_codelists(self): - return [] + table = tables[self.domain] + + # set time range filters depending on the when parameter + if self.when == "before": + min_days_for_trf_prior = self.min_days + max_days_for_trf_prior = self.max_days + + min_days_for_trf_post = GreaterThanOrEqualTo(0) + max_days_for_trf_post = None + else: + min_days_for_trf_prior = GreaterThanOrEqualTo(0) + max_days_for_trf_prior = None + + min_days_for_trf_post = self.min_days + max_days_for_trf_post = self.max_days + + # Ensure that the observation period start date is before the anchor date by defined time range + table = table.mutate(EVENT_DATE=table.OBSERVATION_PERIOD_START_DATE) + trf_prior = RelativeTimeRangeFilter( + min_days=min_days_for_trf_prior, + max_days=max_days_for_trf_prior, + when="before", + anchor_phenotype=self.anchor_phenotype, + ) + table = trf_prior.filter(table) + + # Ensure that end date is after the anchor date + table = table.mutate(EVENT_DATE=table.OBSERVATION_PERIOD_END_DATE) + trf_post = RelativeTimeRangeFilter( + min_days=min_days_for_trf_post, + max_days=max_days_for_trf_post, + when="after", + anchor_phenotype=self.anchor_phenotype, + ) + table = trf_post.filter(table) + + if self.when == "before": + table = table.mutate(EVENT_DATE=table.OBSERVATION_PERIOD_START_DATE) + else: + table = table.mutate(EVENT_DATE=table.OBSERVATION_PERIOD_END_DATE) + + table = table.mutate(VALUE=ibis.null()) + return table diff --git a/phenex/phenotypes/death_phenotype.py b/phenex/phenotypes/death_phenotype.py index a4a2fed..a8020b2 100644 --- a/phenex/phenotypes/death_phenotype.py +++ b/phenex/phenotypes/death_phenotype.py @@ -3,7 +3,8 @@ from ibis.expr.types.relations import Table from phenex.phenotypes.phenotype import Phenotype from phenex.tables import PhenotypeTable, is_phenex_person_table -import ibis +import ibis + class DeathPhenotype(Phenotype): """ @@ -20,10 +21,14 @@ class DeathPhenotype(Phenotype): Executes the phenotype calculation and returns a table with the filtered individuals. """ - def __init__(self, name: str = "death", domain: str = "PERSON", + def __init__( + self, + name: str = "death", + domain: str = "PERSON", relative_time_range: Union[ RelativeTimeRangeFilter, List[RelativeTimeRangeFilter] - ] = None): + ] = None, + ): self.name = name self.domain = domain self.children = [] @@ -46,4 +51,4 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: for rtr in self.relative_time_range: death_table = rtr.filter(death_table) death_table = death_table.mutate(VALUE=ibis.null()) - return death_table.mutate(EVENT_DATE=death_table.DATE_OF_DEATH) \ No newline at end of file + return death_table.mutate(EVENT_DATE=death_table.DATE_OF_DEATH) diff --git a/phenex/phenotypes/sex_phenotype.py b/phenex/phenotypes/sex_phenotype.py index 77b1204..cd97288 100644 --- a/phenex/phenotypes/sex_phenotype.py +++ b/phenex/phenotypes/sex_phenotype.py @@ -1,14 +1,14 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import ibis from ibis.expr.types.relations import Table from phenex.phenotypes.phenotype import Phenotype from phenex.filters.categorical_filter import CategoricalFilter from phenex.tables import PhenotypeTable, is_phenex_person_table + class SexPhenotype(Phenotype): """ - SexPhenotype is a class that represents a sex-based phenotype. It filters individuals - based on their sex (e.g., male, female) using the CategoricalFilter. + SexPhenotype is a class that represents a sex-based phenotype. It is able to identify the sex of individuals and filter them based on identified sex. Attributes: name (str): Name of the phenotype, default is 'sex'. @@ -24,7 +24,7 @@ class SexPhenotype(Phenotype): def __init__( self, name: str = "sex", - allowed_values: List[str] = ["male", "female"], + allowed_values: Optional[List[Union[str, int, float]]] = ["male", "female"], domain: str = "PERSON", ): self.name = name @@ -37,7 +37,10 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable: person_table = tables[self.domain] assert is_phenex_person_table(person_table) - sex_filter = CategoricalFilter(column_name="SEX", allowed_values=self.allowed_values) - filtered_table = sex_filter._filter(person_table) + if self.allowed_values is not None: + sex_filter = CategoricalFilter( + column_name="SEX", allowed_values=self.allowed_values + ) + person_table = sex_filter._filter(person_table) - return filtered_table.mutate(VALUE=filtered_table.SEX, EVENT_DATE= ibis.null()) + return person_table.mutate(VALUE=person_table.SEX, EVENT_DATE=ibis.null()) diff --git a/phenex/test/phenotype_test_generator.py b/phenex/test/phenotype_test_generator.py index d846f53..29faf5b 100644 --- a/phenex/test/phenotype_test_generator.py +++ b/phenex/test/phenotype_test_generator.py @@ -21,6 +21,7 @@ class PhenotypeTestGenerator: name_space = "" date_format = "%m-%d-%Y" test_values = False + value_datatype = float test_date = False join_on = ["PERSON_ID"] @@ -139,7 +140,7 @@ def df_from_test_info(test_info): if "date" in col.lower(): schema[col] = datetime.date elif "value" in col.lower(): - schema[col] = float + schema[col] = self.value_datatype elif "boolean" in col.lower(): schema[col] = bool else: diff --git a/phenex/test/phenotypes/test_continuous_coverage_phenotype.py b/phenex/test/phenotypes/test_continuous_coverage_phenotype.py index df7b62a..166caa3 100644 --- a/phenex/test/phenotypes/test_continuous_coverage_phenotype.py +++ b/phenex/test/phenotypes/test_continuous_coverage_phenotype.py @@ -10,13 +10,12 @@ from phenex.filters.value import * - class ContinuousCoveragePhenotypeTestGenerator(PhenotypeTestGenerator): - name_space = "continuouscoverage" + name_space = "ccpt" def define_input_tables(self): oneday = datetime.timedelta(days=1) - index_date = datetime.datetime.strptime("01-01-2022", "%m-%d-%Y") + index_date = datetime.date(2022, 1, 1) observation_period_min = 90 * oneday possible_start_dates = [ @@ -45,17 +44,20 @@ def define_input_tables(self): N = len(end_dates) df_observation_period = pd.DataFrame() - df_observation_period["PERSON_ID"] = [ - f"P{x}" for x in list(range(N)) - ] + df_observation_period["PERSON_ID"] = [f"P{x}" for x in list(range(N))] df_observation_period["INDEX_DATE"] = index_date - df_observation_period["observation_period_start_date"] = start_dates - df_observation_period["observation_period_end_date"] = end_dates + df_observation_period["OBSERVATION_PERIOD_START_DATE"] = start_dates + df_observation_period["OBSERVATION_PERIOD_END_DATE"] = end_dates + df_observation_period["start_from_end"] = [ + x - y for x, y in zip(end_dates, start_dates) + ] + df_observation_period["start_from_index"] = [index_date - x for x in end_dates] + df_observation_period["end_from_index"] = [index_date - x for x in start_dates] self.df_input = df_observation_period input_info_observation_period = { - "name": "observation_period", + "name": "OBSERVATION_PERIOD", "df": df_observation_period, } @@ -77,10 +79,8 @@ def define_phenotype_tests(self): for test_info in test_infos: test_info["phenotype"] = ContinuousCoveragePhenotype( name=test_info["name"], - domain="observation_period", - coverage_period_min=test_info.get("coverage_period_min"), + min_days=test_info.get("coverage_period_min"), ) - test_info["refactor"] = True # TODO remove once refactored return test_infos @@ -89,9 +89,10 @@ class ContinuousCoverageReturnLastPhenotypeTestGenerator( ContinuousCoveragePhenotypeTestGenerator ): name_space = "ccpt_returnlast" + test_date = True def define_phenotype_tests(self): - persons = ["P7", "P10", "P11", "P12", "P14", "P15"] + persons = ["P15", "P19", "P20", "P22", "P23"] t1 = { "name": "coverage_min_geq_90", @@ -99,19 +100,19 @@ def define_phenotype_tests(self): "persons": persons, "dates": list( self.df_input[self.df_input["PERSON_ID"].isin(persons)][ - "observation_period_end_date" + "OBSERVATION_PERIOD_END_DATE" ].values ), } - persons = ["P7", "P10", "P11"] + persons = ["P19", "P22", "P23"] t2 = { "name": "coverage_min_gt_90", "coverage_period_min": Value(value=90, operator=">"), - "persons": ["P7", "P10", "P11"], + "persons": persons, "dates": list( self.df_input[self.df_input["PERSON_ID"].isin(persons)][ - "observation_period_end_date" + "OBSERVATION_PERIOD_END_DATE" ].values ), } @@ -120,9 +121,8 @@ def define_phenotype_tests(self): for test_info in test_infos: test_info["phenotype"] = ContinuousCoveragePhenotype( name=test_info["name"], - domain="observation_period", - return_date="last", - coverage_period_min=test_info.get("coverage_period_min"), + min_days=test_info.get("coverage_period_min"), + when="after", ) test_info["column_types"] = {f"{test_info['name']}_date": "date"} @@ -133,9 +133,12 @@ def test_continuous_coverage_phenotypes(): spg = ContinuousCoveragePhenotypeTestGenerator() spg.run_tests() + +def test_continuous_coverage_return_last(): spg = ContinuousCoverageReturnLastPhenotypeTestGenerator() spg.run_tests() if __name__ == "__main__": test_continuous_coverage_phenotypes() + test_continuous_coverage_return_last() diff --git a/phenex/test/phenotypes/test_death_phenotype.py b/phenex/test/phenotypes/test_death_phenotype.py index 4550404..d082d6f 100644 --- a/phenex/test/phenotypes/test_death_phenotype.py +++ b/phenex/test/phenotypes/test_death_phenotype.py @@ -44,15 +44,11 @@ def define_phenotype_tests(self): idx_persons = [1, 2, 3, 4, 5] t1 = { "name": "death_prior_including_index", - "time_range_filter": RelativeTimeRangeFilter( - when="before" - ), + "time_range_filter": RelativeTimeRangeFilter(when="before"), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -61,15 +57,12 @@ def define_phenotype_tests(self): t2 = { "name": "death_prior_index", "time_range_filter": RelativeTimeRangeFilter( - when="before", - min_days=GreaterThan(0) + when="before", min_days=GreaterThan(0) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -78,15 +71,12 @@ def define_phenotype_tests(self): t3 = { "name": "death_prior_including_index_max20", "time_range_filter": RelativeTimeRangeFilter( - when="before", - max_days=Value("<=", 30) + when="before", max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -95,16 +85,12 @@ def define_phenotype_tests(self): t4 = { "name": "death_prior_index_max_20", "time_range_filter": RelativeTimeRangeFilter( - when="before", - min_days=GreaterThan(0), - max_days=Value("<=", 30) + when="before", min_days=GreaterThan(0), max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -112,15 +98,11 @@ def define_phenotype_tests(self): idx_persons = [1, 5, 6, 7, 8] t5 = { "name": "death_post_including_index", - "time_range_filter": RelativeTimeRangeFilter( - when="after" - ), + "time_range_filter": RelativeTimeRangeFilter(when="after"), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -129,15 +111,12 @@ def define_phenotype_tests(self): t6 = { "name": "death_post_index", "time_range_filter": RelativeTimeRangeFilter( - when="after", - min_days=GreaterThan(0) + when="after", min_days=GreaterThan(0) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -146,15 +125,12 @@ def define_phenotype_tests(self): t7 = { "name": "death_post_including_index_max20", "time_range_filter": RelativeTimeRangeFilter( - when="after", - max_days=Value("<=", 30) + when="after", max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -162,16 +138,12 @@ def define_phenotype_tests(self): t8 = { "name": "death_post_index_max_20", "time_range_filter": RelativeTimeRangeFilter( - when="after", - min_days=GreaterThan(0), - max_days=Value("<=", 30) + when="after", min_days=GreaterThan(0), max_days=Value("<=", 30) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } @@ -179,16 +151,12 @@ def define_phenotype_tests(self): t9 = { "name": "death_post_min_30_max_50", "time_range_filter": RelativeTimeRangeFilter( - when="after", - min_days=Value(">", 30), - max_days=Value("<=", 50) + when="after", min_days=Value(">", 30), max_days=Value("<=", 50) ), "persons": [f"P{x}" for x in idx_persons], "dates": [ x - for i, x in enumerate( - self.input_table["DATE_OF_DEATH"].values - ) + for i, x in enumerate(self.input_table["DATE_OF_DEATH"].values) if i in idx_persons ], } diff --git a/phenex/test/phenotypes/test_sex_phenotype.py b/phenex/test/phenotypes/test_sex_phenotype.py index 385d089..b2ef81c 100644 --- a/phenex/test/phenotypes/test_sex_phenotype.py +++ b/phenex/test/phenotypes/test_sex_phenotype.py @@ -9,8 +9,10 @@ from phenex.test.phenotype_test_generator import PhenotypeTestGenerator from phenex.filters.value import * + class SexPhenotypeTestGenerator(PhenotypeTestGenerator): - name_space = "sex_phenotype" + name_space = "sxpt" + value_datatype = str def define_input_tables(self): self.n_persons = 6 @@ -27,11 +29,9 @@ def define_input_tables(self): "Unknown", ] - column_types_person = {} input_info_person = { - "name": "person", + "name": "PERSON", "df": df_person, - "column_types": column_types_person, } return [input_info_person]