forked from stevekm/AutoReportLite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
workflow.Rmd
88 lines (66 loc) · 2.61 KB
/
workflow.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
---
title: "Workflow"
author: "Stephen Kelly"
date: "4/29/2016"
output: html_document
---
```{r,engine='bash'}
# I usually just copy/paste these commands into the terminal (bash shell)
# alternatively you could use something like Rstudio to compile this Rmd document
# or just save these steps as a bash script; pick which ever is easiest for you!
# path to the analysis project directory
ProjDir="$HOME/AutoReportLite"
# path to dir for pipeline outputs
testOutdir="$ProjDir/analysis_pipeline"
# path to sample sheet
samplesheet="$ProjDir/sample-sheet.tsv"
# path to pipeline script
tmp_script="$ProjDir/code/analysis_pipeline.sh"
# make sure the script is executable; make the outdir; switch to the project dir
chmod +x "$tmp_script"
mkdir -p "$testOutdir"
cd "$ProjDir"
# read in lines from sample sheet, skipping the header
tail -n +2 $samplesheet | while read i; do
# echo "$i"
# make sure there is an entry on the line
if [[ ! -z "$i" ]]; then
# get sample ID
tmp_sample=$(echo "$i" | cut -f1)
tmp_sample="${tmp_sample}"
# tmp_sample="${tmp_sample}_R1"
echo "tmp_sample is $tmp_sample"
# make a subdir for the sample
tmp_outdir="${testOutdir}/${tmp_sample}"
mkdir -p "$tmp_outdir"
echo "tmp_outdir is $tmp_outdir"
# make a subdir for that sample's logs
tmp_logdir="${tmp_outdir}/logs"
mkdir -p "$tmp_logdir"
# get the refernce genome
tmp_genome=$(echo "$i" | cut -f3)
echo "tmp_genome is $tmp_genome"
# get the full path to the reference genome; set this as needed based on alignment program to be used
# genome_path=$(echo "$i" | cut -f6)
tmp_genome_path="/local/data/iGenomes/Mus_musculus/UCSC/${tmp_genome}/Sequence/BowtieIndex/genome"
echo "$tmp_genome_path"
# get the first read
tmp_fastq1=$(echo "$i" | cut -f4)
echo "tmp_fastq1 is $tmp_fastq1"
# get the second read
tmp_fastq2=$(echo "$i" | cut -f5)
echo "tmp_fastq2 is $tmp_fastq2"
# submit job to be run on the cluster
qsub -wd $tmp_outdir -o :${tmp_logdir}/ -e :${tmp_logdir}/ -pe threaded 6-16 -l mem_free=10G -l mem_token=10G "$tmp_script" "$tmp_outdir" "$tmp_fastq1" "$tmp_fastq2" "$tmp_sample" "$tmp_genome" "$tmp_genome_path"
# args:
# # -wd : job's working dir
# # -o : stdout log directory
# # -e : stderr log directory
# # -pe threaded <integer> : CPU threads allocated
# # -l mem_free=<int>G : only run job on node with <int> Gigabytes free memory
# # -l mem_token=<int>G : reserve <int> Gigabytes of memory for job
# # -N <name> : name for the job
echo -e "\n\n"
fi
done
```