From d03a0792b9ab6ea62d2715b57ce50fcf1a84967c Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Fri, 27 Oct 2023 13:00:33 +0200 Subject: [PATCH] remove patsy dependency --- ncem/tl/fit/backend/design_matrix.py | 23 +++++++++-------------- ncem/tl/fit/backend/linear_model.py | 1 - ncem/tl/fit/backend/testing.py | 1 + pyproject.toml | 3 ++- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ncem/tl/fit/backend/design_matrix.py b/ncem/tl/fit/backend/design_matrix.py index e9f4c1f8..f1b182f4 100644 --- a/ncem/tl/fit/backend/design_matrix.py +++ b/ncem/tl/fit/backend/design_matrix.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd import patsy +from formulaic import Formula from ncem.tl.fit.constants import PREFIX_INDEX, PREFIX_NEIGHBOR @@ -248,11 +249,9 @@ def get_dmat_from_obs(obs: pd.DataFrame, obs_niche: pd.DataFrame, formula: str, # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type, obs_niche], axis=1) columns_names = formula.replace('~0+','').split('+') - - dmat = patsy.dmatrix(formula, obs_full) - # Simplify names, this is necessary for patsy to accept these as terms later. - column_names = [f"{x.split('[')[0]}{x.split('[')[1].split(']')[-1]}" for x in dmat.design_info.column_names] - dmat = pd.DataFrame(np.asarray(dmat), index=obs.index, columns=column_names) + + dmat = Formula(formula).get_model_matrix(obs_full) + dmat = pd.DataFrame(np.asarray(dmat), index=obs.index, columns=dmat.columns) return dmat @@ -298,13 +297,8 @@ def get_dmats_from_deconvoluted( obs_index_type_x.index = obs.index # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type_x, obs_niche], axis=1) - dmats[x] = patsy.dmatrix(formulas[x], obs_full) - # ensure that column names start with index type name - dmat_columns = [ - col if col.startswith(PREFIX_INDEX) else PREFIX_INDEX + x + col for col in dmats[x].design_info.column_names - ] - dmat_columns = [f"{x.split('[')[0]}{x.split('[')[1].split(']')[-1]}" for x in dmat_columns] - dmats[x] = pd.DataFrame(np.asarray(dmats[x]), index=obs.index, columns=dmat_columns) + dmats[x] = Formula(formulas[x]).get_model_matrix(obs_full) + dmats[x] = pd.DataFrame(np.asarray(dmats[x]), index=obs.index, columns=dmats[x].columns) return dmats @@ -353,7 +347,8 @@ def get_dmat_global_from_deconvoluted(obs: pd.DataFrame, deconv: pd.DataFrame, f obs_index_type[x].index = obs.index # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type_x, obs_niche], axis=1) - dmat_x = patsy.dmatrix(formula, obs_full) - dmats.append(pd.DataFrame(np.asarray(dmat_x), index=obs.index, columns=dmat_x.design_info.column_names)) + + dmat_x = Formula(formula).get_model_matrix(obs_full) + dmats.append(pd.DataFrame(np.asarray(dmat_x), index=obs.index, columns=dmat_x.columns)) dmat = pd.concat(dmats, axis=0) return dmat diff --git a/ncem/tl/fit/backend/linear_model.py b/ncem/tl/fit/backend/linear_model.py index 1fa40c68..6d9aa9f6 100644 --- a/ncem/tl/fit/backend/linear_model.py +++ b/ncem/tl/fit/backend/linear_model.py @@ -274,7 +274,6 @@ def linear_ncem_deconvoluted( ) dmats = get_dmats_from_deconvoluted(deconv=adata.obsm[key_deconvolution], formulas=formulas, obs=adata.obs) for k, v in dmats.items(): - print(k) dmat_key = f"{OBSM_KEY_DMAT}_{k}" adata.obsm[dmat_key] = v params = ols_fit(x_=adata.obsm[dmat_key].values, y_=adata.layers[k]) diff --git a/ncem/tl/fit/backend/testing.py b/ncem/tl/fit/backend/testing.py index ebbfd291..9cfda723 100644 --- a/ncem/tl/fit/backend/testing.py +++ b/ncem/tl/fit/backend/testing.py @@ -150,6 +150,7 @@ def test_deconvoluted( for y in coef_to_test: if y in parameter_names: idx = parameter_names.index(y) + print(idx) theta_mle = params.values[:, idx] theta_sd = fisher_inv[:, idx, idx] theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf)) diff --git a/pyproject.toml b/pyproject.toml index 6d4b703b..e96e04c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,10 +22,11 @@ python = ">=3.8,<=3.10" click = "^8.0.0" rich = "^10.1.0" PyYAML = "^5.4.1" -Jinja2 = ">=2.11.3,<4.0.0" +Jinja2 = ">=2.11.3,<=4.0.0" scanpy = "^1.9.3" squidpy = "^1.2.3" patsy = "^0.5.1" +formulaic = "=0.6.6" scipy = "=1.9.1" seaborn = "^0.12.2" matplotlib = "^3.7.1"