From f47dbddf17930204f22481d564168d4168130292 Mon Sep 17 00:00:00 2001 From: Ian Fiddes Date: Tue, 24 Nov 2020 08:28:02 -0800 Subject: [PATCH] Docs, missing clusterGenes fix. For issue #213 and update to PR #215. --- README.md | 13 +++++++++++++ cat/filter_transmap.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5222631c..4813cdb5 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,19 @@ As described above, the primary method to executing the pipeline is to follow th `--workers`: Number of local cores to use. If running `toil` in singleMachine mode, care must be taken with the balance of this value and the `--maxCores` parameter. +## Augustus config options + +The augustus config files for all of the modes live in the CAT folder under `augustus_cfgs`. If you are running CAT from a folder that is not the installation folder, you will need to point CAT to these files directly. + +`--tm-cfg`: Config file for AugustusTM. Defaults to `augustus_cfgs/extrinsic.ETM1.cfg`. + +`--tmr-cfg`: Config file for AugustusTMR. Defaults to `augustus_cfgs/extrinsic.ETM2.cfg`. + +`--augustus-cgp-cfg-template`: Config file template for AugustusCGP. Defaults to `augustus_cfgs/cgp_extrinsic_template.cfg`. + +`--pb-cfg`": Config file for AugustusPB. Defaults to `augustus_cfgs/extrinsic.M.RM.PB.E.W.cfg`. + + ## transMap options `--global-near-best`: Adjusts the `globalNearBest` parameter passed to `pslCDnaFilter`. Defaults to 0.15. The `globalNearBest` algorithm determines which set of alignments are within a certain distance of the highest scoring alignment for a given source transcript. Making this value smaller will increase the number of alignments filtered out, decreasing the apparent paralogous alignment rate. Alignments which survive this filter are putatively paralogous. diff --git a/cat/filter_transmap.py b/cat/filter_transmap.py index 610de3d4..54b3b1ce 100644 --- a/cat/filter_transmap.py +++ b/cat/filter_transmap.py @@ -148,10 +148,10 @@ def hash_aln(aln, aln_id): tools.fileOps.print_row(out_coding, tx.get_gene_pred()) else: tools.fileOps.print_row(out_noncoding, tx.get_gene_pred()) - cmd = ['clusterGenes', '-cds', f'-minOverlappingBases={overlapping_ignore_bases}', + cmd = ['clusterGenes', '-cds', f'-ignoreBases={overlapping_ignore_bases}', coding_tmp, 'no', coding_clusters] tools.procOps.run_proc(cmd) - cmd = ['clusterGenes', f'-minOverlappingBases={overlapping_ignore_bases}', + cmd = ['clusterGenes', f'-ignoreBases={overlapping_ignore_bases}', noncoding_tmp, 'no', noncoding_clusters] tools.procOps.run_proc(cmd) coding_clustered = pd.read_csv(coding_tmp, sep='\t')