Skip to content

Commit

Permalink
Make --H3K27ac an optional parameter. Allow running of ATAC only model
Browse files Browse the repository at this point in the history
  • Loading branch information
jnasser3 committed Apr 17, 2020
1 parent 12cd75a commit e461ef0
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
29 changes: 22 additions & 7 deletions src/neighborhoods.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,12 @@ def annotate_genes_with_features(genes,
merged = genes.merge(tsscounts, on="name", suffixes=['','.TSS1Kb'])

access_col = default_accessibility_feature + ".RPKM.quantile.TSS1Kb"
merged['PromoterActivityQuantile'] = ((0.0001+merged['H3K27ac.RPKM.quantile.TSS1Kb'])*(0.0001+merged[access_col])).rank(method='average', na_option="top", ascending=True, pct=True)

if 'H3K27ac.RPKM.quantile.TSS1Kb' in merged.columns:
merged['PromoterActivityQuantile'] = ((0.0001+merged['H3K27ac.RPKM.quantile.TSS1Kb'])*(0.0001+merged[access_col])).rank(method='average', na_option="top", ascending=True, pct=True)
else:
merged['PromoterActivityQuantile'] = ((0.0001+merged[access_col])).rank(method='average', na_option="top", ascending=True, pct=True)


merged.to_csv(os.path.join(outdir, "GeneList.txt"),
sep='\t', index=False, header=True, float_format="%.6f")
Expand Down Expand Up @@ -544,7 +549,9 @@ def parse_params_file(args):

def get_features(args):
features = {}
features['H3K27ac'] = args.H3K27ac.split(",")

if args.H3K27ac:
features['H3K27ac'] = args.H3K27ac.split(",")

if args.ATAC:
features['ATAC'] = args.ATAC.split(",")
Expand Down Expand Up @@ -573,11 +580,19 @@ def determine_accessibility_feature(args):

def compute_activity(df, access_col):
if access_col == "DHS":
df['activity_base'] = np.sqrt(df['normalized_h3K27ac'] * df['normalized_dhs'])
df['activity_base_no_qnorm'] = np.sqrt(df['H3K27ac.RPM'] * df['DHS.RPM'])
if 'H3K27ac.RPM' in df.columns:
df['activity_base'] = np.sqrt(df['normalized_h3K27ac'] * df['normalized_dhs'])
df['activity_base_no_qnorm'] = np.sqrt(df['H3K27ac.RPM'] * df['DHS.RPM'])
else:
df['activity_base'] = df['normalized_dhs']
df['activity_base_no_qnorm'] = df['DHS.RPM']
elif access_col == "ATAC":
df['activity_base'] = np.sqrt(df['normalized_h3K27ac'] * df['normalized_atac'])
df['activity_base_no_qnorm'] = np.sqrt(df['H3K27ac.RPM'] * df['ATAC.RPM'])
if 'H3K27ac.RPM' in df.columns:
df['activity_base'] = np.sqrt(df['normalized_h3K27ac'] * df['normalized_atac'])
df['activity_base_no_qnorm'] = np.sqrt(df['H3K27ac.RPM'] * df['ATAC.RPM'])
else:
df['activity_base'] = df['normalized_atac']
df['activity_base_no_qnorm'] = df['ATAC.RPM']
else:
raise RuntimeError("At least one of ATAC or DHS must be provided!")

Expand All @@ -589,7 +604,7 @@ def run_qnorm(df, qnorm, qnorm_method = "rank", separate_promoters = True):
# Option to qnorm promoters and nonpromoters separately

if qnorm is None:
df['normalized_h3K27ac'] = df['H3K27ac.RPM']
if 'H3K27ac.RPM' in df.columns: df['normalized_h3K27ac'] = df['H3K27ac.RPM']
if 'DHS.RPM' in df.columns: df['normalized_dhs'] = df['DHS.RPM']
if 'ATAC.RPM' in df.columns: df['normalized_atac'] = df['ATAC.RPM']
else:
Expand Down
2 changes: 1 addition & 1 deletion src/run.neighborhoods.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class formatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpForm
parser.add_argument('--skip_gene_counts', action="store_true", help="Do not count over genes or gene bodies. Will not produce GeneList.txt. Do not use switch if intending to run Predictions")

#epi
parser.add_argument('--H3K27ac', required=required_args, default=None, help="Comma delimited string of H3K27ac .bam files")
parser.add_argument('--H3K27ac', default="", nargs='?', help="Comma delimited string of H3K27ac .bam files")
parser.add_argument('--DHS', default="", nargs='?', help="Comma delimited string of DHS .bam files. Either ATAC or DHS must be provided")
parser.add_argument('--ATAC', default="", nargs='?', help="Comma delimited string of ATAC .bam files. Either ATAC or DHS must be provided")
parser.add_argument('--default_accessibility_feature', default=None, nargs='?', help="If both ATAC and DHS are provided, this flag must be set to either 'DHS' or 'ATAC' signifying which datatype to use in computing activity")
Expand Down

0 comments on commit e461ef0

Please sign in to comment.