-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_seqs.sh
54 lines (40 loc) · 2.12 KB
/
get_seqs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env bash
#Author: Olabiyi Aderemi Obayomi
#created: February 2018
#email: obadbotanist@yahoo.com
#This scripts generates a fasta file of whatever you are looking for, from a QIIME formatted OTU table
#I suggest that you open a directory for the analysis
#if you will be analysing for pathogens for example make sure you have the following files
#1a human_pathogens.txt - a text file containing the list of human pathogen genera to be found
# Or 1b. plant_pathogens.txt - a text file containing the list of plant pathogen genera to be found
#2 otu_table.biom - a biom table containg the list of all the OTUs and there abundance values
#3 seqs_otus.txt - a tab delimited text file containing the map of all the OTUs to their related sequences
#4 seqs.fna - a fasta file generated after quality filtering and demultiplexing which contains the list of all sequences
#Get the user's inputs and save them in their respective variables
echo "What are you looking for? :"
read search
echo "Enter the full path to the file containing the comma separated list of what you looking for: "
read list
echo "Enter the full path to the OTU.biom file: "
read otu_biom
echo "Enter the full path to the seqs_otus.txt file: "
read seqs_otus
echo "Enter the full path to the seqs.fna file: "
read seqs_fna
list=$(cat $list)
#filter out the OTUs of what you are looking for from the given OTU table e.g. human pathogens
filter_taxa_from_otu_table.py -i $otu_biom -o $search.biom -p $list
#convert biom formattted OTU table to tsv
biom convert -i $search.biom -o $search.biom.tsv --to-tsv --header-key taxonomy --output-metadata-id "Consensus Lineage"
#get the OTU names
#cut the first field, remove the first 2 lines
otus=$(cut -f1 $search.biom.tsv|sed -e 1,2d)
#convert the otus to an array
otus=($otus)
#find each OTU in the OTU map
for otu in ${otus[*]}; do
#create the new OTU map by searching for the exact word match
grep -wE "$otu" $seqs_otus >> $search_map.txt
done
#get the fasta sequences for what you are looking for e.g. human pathogens fasta sequences
filter_fasta.py -f $seqs_fna -o $search.fasta -m $search_map.txt