Skip to content

Commit

Permalink
Merge pull request #23 from calico/revision-upd-3
Browse files Browse the repository at this point in the history
Revision update to some scripts and example notebooks.
  • Loading branch information
johli authored Jul 12, 2024
2 parents 0c28e87 + 37e9d5c commit eeee3fe
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 98 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ These repositories further depend on a number of python packages (which are auto
A new conda environment can be created with `conda create -n borzoi_py39 python=3.9`.

### Model Availability
The model weights can be downloaded as .h5 files from the following URLs:
The model weights can be downloaded as .h5 files from the URLs below. We trained a total of 4 model replicates with identical train, validation and test splits (test = fold3, validation = fold4 from [sequences_human.bed.gz](https://github.com/calico/borzoi/blob/main/data/sequences_human.bed.gz)).

[Borzoi V2 Cross-fold 0](https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5)<br/>
[Borzoi V2 Cross-fold 1](https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5)<br/>
[Borzoi V2 Cross-fold 2](https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5)<br/>
[Borzoi V2 Cross-fold 3](https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5)<br/>
[Borzoi V2 Replicate 0](https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5)<br/>
[Borzoi V2 Replicate 1](https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5)<br/>
[Borzoi V2 Replicate 2](https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5)<br/>
[Borzoi V2 Replicate 3](https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5)<br/>

#### Mini Borzoi Models
We have trained a collection of (smaller) model instances on various subsets of data modalities (or on all data modalities but with architectural changes compared to the original architecture). For example, some models are trained only on RNA-seq data while others are trained on DNase-, ATAC- and RNA-seq. Similarly, some model instances are trained on human-only data while others are trained on human- and mouse data. The models are available at the URL below:
We have trained a collection of (smaller) model instances on various subsets of data modalities (or on all data modalities but with architectural changes compared to the original architecture). For example, some models are trained only on RNA-seq data while others are trained on DNase-, ATAC- and RNA-seq. Similarly, some model instances are trained on human-only data while others are trained on human- and mouse data. The models were trained with either 2- or 4-fold cross-validation and are available at the following URL:

[Mini Borzoi Model Collection](https://storage.googleapis.com/seqnn-share/borzoi/mini/)<br/>

Expand Down
34 changes: 17 additions & 17 deletions examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@
"%%bash\n",
"\n",
"#Download model weights\n",
"for fold in f0 f1 f2 f3; do\n",
" mkdir -p \"saved_models/$fold/\"\n",
" local_model=\"saved_models/$fold/model0_best.h5\"\n",
"for rep in f0 f1 f2 f3; do\n",
" mkdir -p \"saved_models/$rep/\"\n",
" local_model=\"saved_models/$rep/model0_best.h5\"\n",
" if [ -f \"$local_model\" ]; then\n",
" echo \"$fold model already exists.\"\n",
" echo \"$rep model already exists.\"\n",
" else\n",
" wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$fold/model0_best.h5\" -O \"$local_model\"\n",
" wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$rep/model0_best.h5\" -O \"$local_model\"\n",
" fi\n",
"done\n",
"\n",
Expand Down Expand Up @@ -150,7 +150,7 @@
"targets_file = 'targets_gtex.txt' #Subset of targets_human.txt\n",
"\n",
"seq_len = 524288\n",
"n_folds = 1 #To use only one model fold, set to 'n_folds = 1'. To use all four folds, set 'n_folds = 4'.\n",
"n_reps = 1 #To use only one model replicate, set to 'n_reps = 1'. To use all four replicates, set 'n_reps = 4'.\n",
"rc = True #Average across reverse-complement prediction\n",
"\n",
"#Read model parameters\n",
Expand Down Expand Up @@ -179,16 +179,16 @@
"#Initialize model ensemble\n",
"\n",
"models = []\n",
"for fold_ix in range(n_folds) :\n",
"for rep_ix in range(n_reps) :\n",
" \n",
" model_file = \"saved_models/f\" + str(fold_ix) + \"/model0_best.h5\"\n",
" model_file = \"saved_models/f\" + str(rep_ix) + \"/model0_best.h5\"\n",
"\n",
" seqnn_model = seqnn.SeqNN(params_model)\n",
" seqnn_model.restore(model_file, 0)\n",
" seqnn_model.build_slice(target_index)\n",
" if rc :\n",
" seqnn_model.strand_pair.append(slice_pair)\n",
" seqnn_model.build_ensemble(rc, '0')\n",
" seqnn_model.build_ensemble(rc, [0])\n",
" \n",
" models.append(seqnn_model)\n"
]
Expand Down Expand Up @@ -370,7 +370,7 @@
"source": [
"%%time\n",
"#Predict for chr10_116952944_T_C\n",
"# (~6 minutes on CPU w 1 fold; ~2 minutes on GPU)\n",
"# (~6 minutes on CPU w 1 replicate; ~2 minutes on GPU)\n",
"\n",
"save_figs = False\n",
"save_suffix = '_chr10_116952944_T_C'\n",
Expand Down Expand Up @@ -492,7 +492,7 @@
" use_ratio=False,\n",
" use_logodds=False,\n",
" subtract_avg=True,\n",
" fold_index=np.arange(n_folds).tolist(),\n",
" fold_index=np.arange(n_reps).tolist(),\n",
")\n"
]
},
Expand Down Expand Up @@ -546,7 +546,7 @@
" plot_start=(poses[0] - start) - 64,\n",
" plot_end=(poses[0] - start) + 64,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_folds_gtex_blood_cov_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_blood_cov_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -642,7 +642,7 @@
" plot_start=(poses[0] - start) - 64,\n",
" plot_end=(poses[0] - start) + 64,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_folds_gtex_blood_cov_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_blood_cov_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -740,7 +740,7 @@
" plot_start=(poses[0] - start) - 64,\n",
" plot_end=(poses[0] - start) + 64,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_folds_gtex_blood_cov_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_blood_cov_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -886,9 +886,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:borzoi_py39_2]",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "conda-env-borzoi_py39_2-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -900,7 +900,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
42 changes: 21 additions & 21 deletions examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@
"%%bash\n",
"\n",
"#Download model weights\n",
"for fold in f0 f1 f2 f3; do\n",
" mkdir -p \"saved_models/$fold/\"\n",
" local_model=\"saved_models/$fold/model0_best.h5\"\n",
"for rep in f0 f1 f2 f3; do\n",
" mkdir -p \"saved_models/$rep/\"\n",
" local_model=\"saved_models/$rep/model0_best.h5\"\n",
" if [ -f \"$local_model\" ]; then\n",
" echo \"$fold model already exists.\"\n",
" echo \"$rep model already exists.\"\n",
" else\n",
" wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$fold/model0_best.h5\" -O \"$local_model\"\n",
" wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$rep/model0_best.h5\" -O \"$local_model\"\n",
" fi\n",
"done\n",
"\n",
Expand Down Expand Up @@ -150,7 +150,7 @@
"targets_file = 'targets_gtex.txt' #Subset of targets_human.txt\n",
"\n",
"seq_len = 524288\n",
"n_folds = 1 #To use only one model fold, set to 'n_folds = 1'. To use all four folds, set 'n_folds = 4'.\n",
"n_reps = 1 #To use only one model replicate, set to 'n_reps = 1'. To use all four replicates, set 'n_reps = 4'.\n",
"rc = True #Average across reverse-complement prediction\n",
"\n",
"#Read model parameters\n",
Expand Down Expand Up @@ -179,16 +179,16 @@
"#Initialize model ensemble\n",
"\n",
"models = []\n",
"for fold_ix in range(n_folds) :\n",
"for rep_ix in range(n_reps) :\n",
" \n",
" model_file = \"saved_models/f\" + str(fold_ix) + \"/model0_best.h5\"\n",
" model_file = \"saved_models/f\" + str(rep_ix) + \"/model0_best.h5\"\n",
"\n",
" seqnn_model = seqnn.SeqNN(params_model)\n",
" seqnn_model.restore(model_file, 0)\n",
" seqnn_model.build_slice(target_index)\n",
" if rc :\n",
" seqnn_model.strand_pair.append(slice_pair)\n",
" seqnn_model.build_ensemble(rc, '0')\n",
" seqnn_model.build_ensemble(rc, [0])\n",
" \n",
" models.append(seqnn_model)\n"
]
Expand Down Expand Up @@ -329,7 +329,7 @@
"source": [
"%%time\n",
"#Predict for chr10_116664061_G_A\n",
"# (~6 minutes on CPU w 1 fold; ~15 seconds on GPU)\n",
"# (~6 minutes on CPU w 1 replicate; ~15 seconds on GPU)\n",
"\n",
"save_figs = False\n",
"save_suffix = '_chr10_116664061_G_A'\n",
Expand Down Expand Up @@ -518,7 +518,7 @@
" use_ratio=True,\n",
" use_logodds=False,\n",
" subtract_avg=True,\n",
" fold_index=np.arange(n_folds).tolist(),\n",
" fold_index=np.arange(n_reps).tolist(),\n",
")\n"
]
},
Expand Down Expand Up @@ -572,7 +572,7 @@
" plot_start=seq_len - (poses[0] - start) - 64 - 1,\n",
" plot_end=seq_len - (poses[0] - start) + 64 - 1,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_folds_gtex_cov_nerve_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_cov_nerve_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -667,7 +667,7 @@
" plot_start=seq_len - (poses[0] - start) - 64 - 1,\n",
" plot_end=seq_len - (poses[0] - start) + 64 - 1,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_folds_gtex_cov_nerve_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_cov_nerve_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -764,7 +764,7 @@
" plot_start=seq_len - (poses[0] - start) - 64 - 1,\n",
" plot_end=seq_len - (poses[0] - start) + 64 - 1,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_folds_gtex_cov_nerve_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_cov_nerve_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -825,7 +825,7 @@
" use_ratio=True,\n",
" use_logodds=False,\n",
" subtract_avg=True,\n",
" fold_index=np.arange(n_folds).tolist(),\n",
" fold_index=np.arange(n_reps).tolist(),\n",
")\n"
]
},
Expand Down Expand Up @@ -879,7 +879,7 @@
" plot_start=seq_len - (pas_pos - start) - 64 - 1,\n",
" plot_end=seq_len - (pas_pos - start) + 64 - 1,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_folds_gtex_pas_cov_nerve_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_pas_cov_nerve_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -974,7 +974,7 @@
" plot_start=seq_len - (pas_pos - start) - 64 - 1,\n",
" plot_end=seq_len - (pas_pos - start) + 64 - 1,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_folds_gtex_pas_cov_nerve_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_pas_cov_nerve_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -1073,7 +1073,7 @@
" plot_start=seq_len - (pas_pos - start) - 64 - 1,\n",
" plot_end=seq_len - (pas_pos - start) + 64 - 1,\n",
" save_figs=False,\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_folds_gtex_pas_cov_nerve_undo_clip'\n",
" fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_pas_cov_nerve_undo_clip'\n",
")\n"
]
},
Expand Down Expand Up @@ -1216,9 +1216,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:borzoi_py39_2]",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "conda-env-borzoi_py39_2-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1230,7 +1230,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit eeee3fe

Please sign in to comment.