-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRiboTaxa_arguments.conf
138 lines (102 loc) · 4.74 KB
/
RiboTaxa_arguments.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
##The configuration file is very important and each parameter needs to be filled to avoid errors.
##Mandatory parameters are denoted by **. Most of them depend on the configuration of your computer capacities/configuration and of your sequencing data.
#Other tool parameters have been optimized and can be left at default.
#-----------------------
#Setting up directories
#-----------------------
[BASE]
####set up RiboTaxa directory **
RiboTaxa_DIR = /home/user/RiboTaxa
####set up data directory containing only raw reads in fastq/fastq.gz format **
###paired-end files should be metagenome_R1.fastq/fastq.gz and metagenome_R2.fastq/fastq.gz
###singled-end file should be metagenome.fastq/fastq.gz
DATA_DIR = /home/user/Documents/raw_reads
####format of your paired end files **
## fastq : if files are not compressed
## fastq.gz: if files are compressed in gz format
FORMAT = fastq
####set up output directory **
OUTPUT = /home/user/Documents/RiboTaxa_results
####number of threads/CPUS to be used through the pipeline **
THREAD = 8
[BBMAP]
####RAM limit to be used by BBTOOLS/BBMAP during quality control and mapping **
##depends of the computer RAM. Use approx 80% of available RAM
##example: Available RAM = 16GB, therefore RAM = 80/100*16 = 12GB
RAM = 12
#-----------------------------
#Quality control using BBTOOLS
#-----------------------------
####Trim reads to remove bases matching adapter sequences (Default value = r)
## f (don't trim)
## r (trim to the right)
## l (trim to the left)
##In ktrim=r mode, once a reference kmer is matched in a read,
##that kmer and all the bases to the right will be trimmed, leaving
##only the bases to the left; this is the normal mode for adapter trimming.
ktrim = r
####Kmer length used for finding adapters (Default value = 21)
kmer = 21
####Reads shorter than this length (bases) after trimming will be discarded (Default value = 60)
minlength = 60
#Regions with average quality BELOW this will be trimmed (Default value = 20)
trimq = 20
####Trim read ends to remove bases with quality below trimq (Default value = rl)
# rl (trim both ends),
# f (neither end),
# r (right end only),
# l (left end only),
# w (sliding window)
qtrim = rl
####reads with more Ns than this (after trimming) will be discarded (Default value = 1)
maxns = 1
#------------------------------------
#Filter 16S/18S reads using SortmeRNA
#------------------------------------
####indexed database directory for sortmerna **
##This directory should contain one .clustered.fasta and several .clustered. files
SORTMERNA_DB = /home/user/Documents/Databases/sortmerna_indexed_DB
#----------------------------------------------------------
#Reconstructing 16S/18S sequences using EMIRGE and MetaRIB
#----------------------------------------------------------
[EMIRGE]
####set up database directory containing indexed files for emirge **
##This directory should contain one fasta file and several .ebwt files
EMIRGE_DB = /home/user/Documents/Databases/bowtie_indexed_DB
####length of longest reads **
MAX_LENGTH = 300
####identity threshold (Default value = 1)
##This the JOIN_TRESHOLD parameter of EMIRGE. If two candidate sequences
##share >= this fractional identity over their bases with mapped reads, then
##merge the two sequences into one for the next iteration. Fixed to 1, sequence
##reconstruction gave the best results on controlled samples (mock or synthetic communities).
IDENTITY = 1
####number of iterations (Default value = 40)
##Number of iterations to perform by EMIRGE for sequence reconstruction.
##The default value can fit to most of metagenomic data. EMIRGE authors recommended to increase this value for complex communities.
NUM_ITERATION = 40
####mean insert size **
##Insert size distribution mean
MEAN_INSERT_SIZE = 300
####standard deviation **
##Insert size distribution standard deviation
STD_DEV = 100
####minimum fraction of the length of a candidate
##reference sequence that must be covered by mapped
##reads (Default=0.3) Range [0.0,1.0]
MIN_COV = 0.3
[METARIB]
####Subsampling reads number in each iteration (Default=1000000)
SAMPLING_NUM = 1000000
#-----------------------------------------------------------
#Taxonomic classfication using sklearn_classifier of qiime2
#-----------------------------------------------------------
####Set up path+database name for sklearn classifier **
SKLEARN_DB = /home/user/Documents/Databases/qiime2020.8_silva138/silva-138-99-nb-classifier.qza
####Confidence threshold for limiting taxonomic depth (default = 0.7)
##This threshold ensures qualititative affiliation of reconstructed sequences.
##We do not recommend to lower this value under 0.7.
CONFIDENCE = 0.7
#Number of reads to process in each batch (default = auto: 20 000 sequences)
#use BATCH = 1 if you have less than 16GB RAM to avoid errors
BATCH = auto