Skip to content

Commit

Permalink
Merge pull request #618 from amath-idm/update-summary
Browse files Browse the repository at this point in the history
Update summary
  • Loading branch information
cliffckerr authored Sep 27, 2023
2 parents 84929ff + 1548f1c commit 9fe3aed
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 54 deletions.
15 changes: 10 additions & 5 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ All notable changes to the codebase are documented in this file. Changes that ma
:depth: 1


Version 1.2.7 (2023-09-22)
---------------------------
- Updates ``sim.summary`` to have more useful information
- *Github info* PR `618 <https://github.com/amath-idm/hpvsim/pull/618>`__

Version 1.2.6 (2023-09-22)
---------------------------
- Fixes plotting issue with MultiSims and Jupyter notebooks
Expand All @@ -17,27 +22,27 @@ Version 1.2.6 (2023-09-22)

Version 1.2.5 (2023-09-21)
---------------------------
- Fixes file path when run via Jupyter.
- Fixes file path when run via Jupyter
- *Github info* PR `610 <https://github.com/amath-idm/hpvsim/pull/610>`__

Version 1.2.4 (2023-09-19)
---------------------------
- Fixes Matplotlib regression in plotting.
- Fixes Matplotlib regression in plotting
- *Github info* PR `609 <https://github.com/amath-idm/hpvsim/pull/609>`__

Version 1.2.3 (2023-08-30)
---------------------------
- Updates data loading to be much more efficient.
- Updates data loading to be much more efficient
- *Github info* PR `604 <https://github.com/amath-idm/hpvsim/pull/604>`__

Version 1.2.2 (2023-08-11)
---------------------------
- Improved tests and included ``conda`` environment specification.
- Improved tests and included ``conda`` environment specification
- *Github info* PR `598 <https://github.com/amath-idm/hpvsim/pull/598>`__

Version 1.2.1 (2023-07-09)
---------------------------
- Updated data files being used.
- Updated data files being used
- *Github info* PR `586 <https://github.com/amath-idm/hpvsim/pull/586>`__

Version 1.2.0 (2023-05-31)
Expand Down
17 changes: 11 additions & 6 deletions hpvsim/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def result_keys(self, which='all'):
'''
Get the actual results objects, not other things stored in sim.results.
If which is 'main', return only the main results keys. If 'genotype', return
If which is 'total', return only the main results keys. If 'genotype', return
only genotype keys. If 'all', return all keys.
'''
Expand Down Expand Up @@ -568,7 +568,7 @@ def to_json(self, filename=None, keys=None, tostring=False, indent=2, verbose=Fa

# Handle keys
if keys is None:
keys = ['results', 'pars', 'summary']
keys = ['results', 'pars', 'summary', 'short_summary']
keys = sc.promotetolist(keys)

# Convert to JSON-compatible format
Expand All @@ -588,6 +588,11 @@ def to_json(self, filename=None, keys=None, tostring=False, indent=2, verbose=Fa
d['summary'] = dict(sc.dcp(self.summary))
else:
d['summary'] = 'Summary not available (Sim has not yet been run)'
elif key == 'short_summary':
if self.results_ready:
d['short_summary'] = dict(sc.dcp(self.short_summary))
else:
d['short_summary'] = 'Full summary not available (Sim has not yet been run)'
else: # pragma: no cover
try:
d[key] = sc.sanitizejson(getattr(self, key))
Expand All @@ -612,7 +617,7 @@ def to_df(self, date_index=False):
'''
resdict = self.export_results(for_json=False)
resdict = {k:v for k,v in resdict.items() if v.ndim == 1}
df = pd.DataFrame.from_dict(resdict)
df = sc.dataframe.from_dict(resdict)
df['year'] = self.res_yearvec
new_columns = ['t','year'] + df.columns[1:-1].tolist() # Get column order
df = df.reindex(columns=new_columns) # Reorder so 't' and 'date' are first
Expand Down Expand Up @@ -641,7 +646,7 @@ def to_excel(self, filename=None, skip_pars=None):
# Export parameters
pars = {str(k):sc.dcp(v) for k,v in self.pars.items() if k not in skip_pars}
pars['immunity_map'] = {str(k):v for k,v in pars['immunity_map'].items()}
par_df = pd.DataFrame.from_dict(sc.flattendict(pars, sep='_'), orient='index', columns=['Value'])
par_df = sc.dataframe.from_dict(sc.flattendict(pars, sep='_'), orient='index', columns=['Value'])
par_df.index.name = 'Parameter'

# Convert to spreadsheet
Expand Down Expand Up @@ -777,7 +782,7 @@ def _get_ia(self, which, label=None, partial=False, as_list=False, as_inds=False
n_ia = len(ia_list) # Number of interventions/analyzers

if label == 'summary': # Print a summary of the interventions
df = pd.DataFrame(columns=['ind', 'label', 'type'])
df = sc.dataframe(columns=['ind', 'label', 'type'])
for ind,ia_obj in enumerate(ia_list):
df = df.append(dict(ind=ind, label=str(ia_obj.label), type=type(ia_obj)), ignore_index=True)
print(f'Summary of {which}:')
Expand Down Expand Up @@ -1508,7 +1513,7 @@ def indices(self):

def to_df(self):
''' Convert to a Pandas dataframe '''
df = pd.DataFrame.from_dict({key:self[key] for key in self.keys()})
df = sc.dataframe.from_dict({key:self[key] for key in self.keys()})
return df

def to_arr(self):
Expand Down
90 changes: 55 additions & 35 deletions hpvsim/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,6 @@ def init_genotypes(self, upper_dysp_lim=200):
# Do any precomputations for the genotype transformation functions
t_step = self['dt']
t_sequence = np.arange(0, upper_dysp_lim, t_step)
timesteps = t_sequence / t_step
cumdysp = dict()
for g in range(self['n_genotypes']):
sev_fn = self['genotype_pars'][g]['sev_fn']
Expand Down Expand Up @@ -463,14 +462,14 @@ def init_res(*args, **kwargs):
results[f'n_{stock.name}_by_genotype'] = init_res(stock.label+' by genotype', n_rows=ng)

# Only by-age stock result we will need is number infectious, susceptible, and with cin, for HPV and CIN prevalence/incidence calculations
results[f'n_infectious_by_age'] = init_res('Number infectious by age', n_rows=na, color=stock.color)
results[f'n_females_infectious_by_age'] = init_res('Number of females infectious by age', n_rows=na, color=stock.color)
results[f'n_susceptible_by_age'] = init_res('Number susceptible by age', n_rows=na, color=stock.color)
results[f'n_transformed_by_age'] = init_res('Number transformed by age', n_rows=na, color=stock.color)
results[f'n_precin_by_age'] = init_res('Number Pre-CIN by age', n_rows=na, color=stock.color)
results[f'n_cin1_by_age'] = init_res('Number CIN1 by age', n_rows=na, color=stock.color)
results[f'n_cin2_by_age'] = init_res('Number CIN2 by age', n_rows=na, color=stock.color)
results[f'n_cin3_by_age'] = init_res('Number CIN3 by age', n_rows=na, color=stock.color)
results['n_infectious_by_age'] = init_res('Number infectious by age', n_rows=na, color=stock.color)
results['n_females_infectious_by_age'] = init_res('Number of females infectious by age', n_rows=na, color=stock.color)
results['n_susceptible_by_age'] = init_res('Number susceptible by age', n_rows=na, color=stock.color)
results['n_transformed_by_age'] = init_res('Number transformed by age', n_rows=na, color=stock.color)
results['n_precin_by_age'] = init_res('Number Pre-CIN by age', n_rows=na, color=stock.color)
results['n_cin1_by_age'] = init_res('Number CIN1 by age', n_rows=na, color=stock.color)
results['n_cin2_by_age'] = init_res('Number CIN2 by age', n_rows=na, color=stock.color)
results['n_cin3_by_age'] = init_res('Number CIN3 by age', n_rows=na, color=stock.color)

# Create incidence and prevalence results
for var,name,color in zip(hpd.inci_keys, hpd.inci_names, hpd.inci_colors):
Expand Down Expand Up @@ -758,7 +757,6 @@ def step(self):
dt = self['dt'] # Timestep
t = self.t
ng = self['n_genotypes']
na = len(self.pars['age_bin_edges']) - 1 # Number of age bins
condoms = self['condoms']
eff_condoms = self['eff_condoms']
beta = self['beta']
Expand Down Expand Up @@ -885,20 +883,20 @@ def step(self):
f_inds = hpu.true(people['sex']==0)
infinds = hpu.true(people['infectious'])
f_infinds = np.intersect1d(f_inds, infinds)
self.results[f'n_females_infectious_by_age'][:, idx] = np.histogram(people.age[f_infinds], bins=people.age_bin_edges, weights=people.scale[f_infinds])[0]
self.results[f'n_infectious_by_age'][:, idx] = np.histogram(people.age[infinds], bins=people.age_bin_edges, weights=people.scale[infinds])[0]
susinds = hpu.true(people['susceptible'])
self.results[f'n_susceptible_by_age'][:, idx] = np.histogram(people.age[susinds], bins=people.age_bin_edges, weights=people.scale[susinds])[0]
transformedinds = hpu.true(people['transformed'])
self.results[f'n_transformed_by_age'][:, idx] = np.histogram(people.age[transformedinds], bins=people.age_bin_edges, weights=people.scale[transformedinds])[0]
precininds = hpu.true(people['precin'])
self.results[f'n_precin_by_age'][:, idx] = np.histogram(people.age[precininds], bins=people.age_bin_edges, weights=people.scale[precininds])[0]
cin1inds = hpu.true(people['cin1'])
self.results[f'n_cin1_by_age'][:, idx] = np.histogram(people.age[cin1inds], bins=people.age_bin_edges, weights=people.scale[cin1inds])[0]
cin2inds = hpu.true(people['cin2'])
self.results[f'n_cin2_by_age'][:, idx] = np.histogram(people.age[cin2inds], bins=people.age_bin_edges, weights=people.scale[cin2inds])[0]
cin3inds = hpu.true(people['cin3'])
self.results[f'n_cin3_by_age'][:, idx] = np.histogram(people.age[cin3inds], bins=people.age_bin_edges, weights=people.scale[cin3inds])[0]
self.results['n_females_infectious_by_age'][:, idx] = np.histogram(people.age[f_infinds], bins=people.age_bin_edges, weights=people.scale[f_infinds])[0]
self.results['n_infectious_by_age'][:, idx] = np.histogram(people.age[infinds], bins=people.age_bin_edges, weights=people.scale[infinds])[0]
self.results['n_susceptible_by_age'][:, idx] = np.histogram(people.age[susinds], bins=people.age_bin_edges, weights=people.scale[susinds])[0]
self.results['n_transformed_by_age'][:, idx] = np.histogram(people.age[transformedinds], bins=people.age_bin_edges, weights=people.scale[transformedinds])[0]
self.results['n_precin_by_age'][:, idx] = np.histogram(people.age[precininds], bins=people.age_bin_edges, weights=people.scale[precininds])[0]
self.results['n_cin1_by_age'][:, idx] = np.histogram(people.age[cin1inds], bins=people.age_bin_edges, weights=people.scale[cin1inds])[0]
self.results['n_cin2_by_age'][:, idx] = np.histogram(people.age[cin2inds], bins=people.age_bin_edges, weights=people.scale[cin2inds])[0]
self.results['n_cin3_by_age'][:, idx] = np.histogram(people.age[cin3inds], bins=people.age_bin_edges, weights=people.scale[cin3inds])[0]

# Create total stocks
for key in self.people.meta.genotype_stock_keys:
Expand Down Expand Up @@ -1156,9 +1154,20 @@ def safedivide(num,denom):
self.results['cum_cancer_treated'][:] = np.cumsum(self.results['new_cancer_treatments'][:], axis=0)

return


def compute_summary(self, t=None, update=True, output=False, require_run=False):


def compute_age_mean(self, reskey, t=None):
if t is None:
t = -1
assert 'by_age' in reskey, 'This method can only be used for age results'
res = self.results[reskey][:,t]
edges = self['age_bin_edges']
mean_edges = edges[:-1] + np.diff(edges)/2
age_mean = ((res/res.sum())*mean_edges).sum()
return age_mean


def compute_summary(self, t=None, update=True, output=False, full=False, require_run=False):
'''
Compute the summary dict and string for the sim. Used internally; see
sim.summarize() for the user version.
Expand All @@ -1177,17 +1186,30 @@ def compute_summary(self, t=None, update=True, output=False, require_run=False):
errormsg = 'Simulation not yet run'
raise RuntimeError(errormsg)

s = sc.odict()
s['total HPV infections'] = self.results['infections'].sum()
s['total cancers'] = self.results['cancers'].sum()
s['total cancer deaths'] = self.results['cancer_deaths'].sum()
s['mean HPV prevalence (%)'] = self.results['hpv_prevalence'].mean()*100
s['mean cancer incidence (per 100k)'] = self.results['cancer_incidence'].mean()
s['mean age of infection (years)'] = self.compute_age_mean('infections_by_age', t=t)
s['mean age of cancer (years)'] = self.compute_age_mean('cancers_by_age', t=t)

summary = sc.objdict()
for key in self.result_keys('total'):
summary[key] = self.results[key][t]

# Update the stored state
if update:
self.short_summary = s
self.summary = summary

# Optionally return
if output:
return summary
if full:
return summary
else:
return s
else:
return

Expand Down Expand Up @@ -1219,16 +1241,13 @@ def summarize(self, full=False, t=None, sep=None, output=False):
if sep is None: sep = hpo.sep # Default separator
labelstr = f' "{self.label}"' if self.label else ''
string = f'Simulation{labelstr} summary:\n'
for key in self.result_keys('total'):
val = summary[key]
printval = f' {val:10,.0f} '
label = self.results[key].name.lower().replace(',', sep)
if 'incidence' in key or 'prevalence' in key:
if key in ['hpv_prevalence', 'hpv_incidence']:
printval = f' {val*100:10.2f} '
label += ' (/100)'
else:
label += ' (/100,000)'
for label,val in summary.items():
if 'total' in label:
printval = f' {val:13,.0f} '
elif 'mean' in label:
printval = f' {val:13,.2f} '
else:
raise NotImplementedError
string += printval + label + '\n'

# Print or return string
Expand Down Expand Up @@ -1263,7 +1282,7 @@ class AlreadyRunError(RuntimeError):
pass


def diff_sims(sim1, sim2, skip_key_diffs=False, skip=None, output=False, die=False):
def diff_sims(sim1, sim2, skip_key_diffs=False, skip=None, full=False, output=False, die=False):
'''
Compute the difference of the summaries of two simulations, and print any
values which differ.
Expand All @@ -1273,6 +1292,7 @@ def diff_sims(sim1, sim2, skip_key_diffs=False, skip=None, output=False, die=Fal
sim2 (sim/dict): ditto
skip_key_diffs (bool): whether to skip keys that don't match between sims
skip (list): a list of values to skip
full (bool): whether to use the full summary (else, brief)
output (bool): whether to return the output as a string (otherwise print)
die (bool): whether to raise an exception if the sims don't match
require_run (bool): require that the simulations have been run
Expand All @@ -1285,9 +1305,9 @@ def diff_sims(sim1, sim2, skip_key_diffs=False, skip=None, output=False, die=Fal
'''

if isinstance(sim1, Sim):
sim1 = sim1.compute_summary(update=False, output=True, require_run=True)
sim1 = sim1.compute_summary(update=False, output=True, require_run=True, full=full)
if isinstance(sim2, Sim):
sim2 = sim2.compute_summary(update=False, output=True, require_run=True)
sim2 = sim2.compute_summary(update=False, output=True, require_run=True, full=full)
for sim in [sim1, sim2]:
if not isinstance(sim, dict): # pragma: no cover
errormsg = f'Cannot compare object of type {type(sim)}, must be a sim or a sim.summary dict'
Expand Down
2 changes: 1 addition & 1 deletion hpvsim/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

__all__ = ['__version__', '__versiondate__', '__license__']

__version__ = '1.2.6'
__version__ = '1.2.7'
__versiondate__ = '2023-09-22'
__license__ = f'HPVsim {__version__} ({__versiondate__}) — © 2023 by IDM'
8 changes: 1 addition & 7 deletions tests/test_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
import sciris as sc
import hpvsim as hpv
import pandas as pd

do_plot = 1
do_save = 0
Expand All @@ -15,9 +14,6 @@
parameters_filename = sc.thisdir(hpv.__file__, 'regression', f'pars_v{hpv.__version__}.json')
hpv.options.set(interactive=False) # Assume not running interactively

testdir = sc.thisdir(aspath=True)
tempdir = testdir/'temp'
tempdir.mkdir(exist_ok=True)

def make_sim(use_defaults=False, do_plot=False, **kwargs):
'''
Expand Down Expand Up @@ -101,12 +97,10 @@ def test_baseline():

# Calculate new baseline
new = make_sim()
new.to_json(tempdir/'baseline_pre_run.json') # Check that exporting works before results have been generated
new.run()
new.to_json(tempdir/'baseline_post_run.json') # Check that exporting works before results have been generated

# Compute the comparison
hpv.diff_sims(old, new, die=True)
hpv.diff_sims(old, new, full=True, die=True)

return new

Expand Down

0 comments on commit 9fe3aed

Please sign in to comment.