From f47dbddf17930204f22481d564168d4168130292 Mon Sep 17 00:00:00 2001
From: Ian Fiddes <ian.fiddes@inscripta.com>
Date: Tue, 24 Nov 2020 08:28:02 -0800
Subject: [PATCH] Docs, missing clusterGenes fix. For issue #213 and update to
 PR #215.

---
 README.md              | 13 +++++++++++++
 cat/filter_transmap.py |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5222631c..4813cdb5 100644
--- a/README.md
+++ b/README.md
@@ -135,6 +135,19 @@ As described above, the primary method to executing the pipeline is to follow th
 
 `--workers`: Number of local cores to use. If running `toil` in singleMachine mode, care must be taken with the balance of this value and the `--maxCores` parameter.
 
+## Augustus config options
+
+The augustus config files for all of the modes live in the CAT folder under `augustus_cfgs`. If you are running CAT from a folder that is not the installation folder, you will need to point CAT to these files directly.
+
+`--tm-cfg`: Config file for AugustusTM. Defaults to `augustus_cfgs/extrinsic.ETM1.cfg`.
+
+`--tmr-cfg`: Config file for AugustusTMR. Defaults to `augustus_cfgs/extrinsic.ETM2.cfg`.
+
+`--augustus-cgp-cfg-template`: Config file template for AugustusCGP. Defaults to `augustus_cfgs/cgp_extrinsic_template.cfg`.
+
+`--pb-cfg`": Config file for AugustusPB. Defaults to `augustus_cfgs/extrinsic.M.RM.PB.E.W.cfg`.
+
+
 ## transMap options
 `--global-near-best`: Adjusts the `globalNearBest` parameter passed to `pslCDnaFilter`. Defaults to 0.15. The `globalNearBest` algorithm determines which set of alignments are within a certain distance of the highest scoring alignment for a given source transcript. Making this value smaller will increase the number of alignments filtered out, decreasing the apparent paralogous alignment rate. Alignments which survive this filter are putatively paralogous. 
 
diff --git a/cat/filter_transmap.py b/cat/filter_transmap.py
index 610de3d4..54b3b1ce 100644
--- a/cat/filter_transmap.py
+++ b/cat/filter_transmap.py
@@ -148,10 +148,10 @@ def hash_aln(aln, aln_id):
                     tools.fileOps.print_row(out_coding, tx.get_gene_pred())
                 else:
                     tools.fileOps.print_row(out_noncoding, tx.get_gene_pred())
-        cmd = ['clusterGenes', '-cds', f'-minOverlappingBases={overlapping_ignore_bases}',
+        cmd = ['clusterGenes', '-cds', f'-ignoreBases={overlapping_ignore_bases}',
                coding_tmp, 'no', coding_clusters]
         tools.procOps.run_proc(cmd)
-        cmd = ['clusterGenes', f'-minOverlappingBases={overlapping_ignore_bases}',
+        cmd = ['clusterGenes', f'-ignoreBases={overlapping_ignore_bases}',
                noncoding_tmp, 'no', noncoding_clusters]
         tools.procOps.run_proc(cmd)
         coding_clustered = pd.read_csv(coding_tmp, sep='\t')