-
Notifications
You must be signed in to change notification settings - Fork 0
/
FileCompression.sh
82 lines (75 loc) · 3.66 KB
/
FileCompression.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env bash
######################### Parameters ##########################################
data_dir="/data/lab"
filetype_tocompress=("fastq" "fq" "vcf" "sam")
threads=16
broadcast="TRUE"
###############################################################################
samtools &>/dev/null
[ $? -eq 127 ] && {
echo -e "Cannot find the command samtools.\n"
exit 1
}
####### Start preocessing #######
script_folder=$(dirname $(readlink -f "$0"))
logfile=$script_folder/FileCompression.log
error_pattern="(error)|(fatal)|(corrupt)|(interrupt)|(EOFException)|(no such file or directory)"
SECONDS=0
echo -e "****************** Start Compression ******************" &>>$logfile
echo -e ">>> Compression start at $(date +'%Y-%m-%d %H:%M:%S')" &>>$logfile
echo -e "File types to compress: ${filetype_tocompress[*]}\n" &>>$logfile
if [[ " ${filetype_tocompress[*]} " == *" sam "* ]]; then
filetype_tocompress=("${filetype_tocompress[@]/sam/}")
for filetype in "${filetype_tocompress[@]}"; do
[[ $filetype != "sam" ]] && new_array+=($filetype)
done
filetype_tocompress=("${new_array[@]}")
unset new_array
arr=($(find "$data_dir" -type f | grep -iP ".*.sam$"))
for file in "${arr[@]}"; do
if [[ ! -L $file ]] && [[ -f $file ]]; then
echo -e "*** The file will be convert to BAM:\n$file" &>>$logfile | tee -a ${file}.compress.log
prefix=${file%%.sam}
samtools view -@ $threads -Shb $file -o ${prefix}.bam &>>$logfile | tee -a ${file}.compress.log
if [[ ! $(grep -iP "${error_pattern}" "${file}.compress.log") ]]; then
rm -f $file
echo -e "SAM-to-BAM conversion completed. New bam file:\n${prefix}.bam" &>>$logfile | tee -a ${file}.compress.log
else
echo -e "ERROR! SAM-to-BAM conversion failed:\n$file" &>>$logfile | tee -a ${file}.compress.log
if [[ $broadcast == "TRUE" ]]; then
echo -e ">>> FileCompression(${data_dir}): $(date +'%Y-%m-%d %H:%M:%S') FileCompression failed! Please check the log: $logfile" >>/etc/motd
fi
exit 1
fi
fi
done
fi
regex=$(printf -- "(.*.%s$)|" "${filetype_tocompress[@]}")
regex=${regex%|}
arr=($(find "$data_dir" -type f | grep -iP "$regex"))
if [[ ${#arr[@]} != 0 ]]; then
for file in "${arr[@]}"; do
if [[ ! -L $file ]] && [[ -f $file ]]; then
echo -e "*** The file will be gzipped:\n$file" &>>$logfile | tee -a ${file}.compress.log
pigz -p $threads -f $file &>>$logfile | tee -a ${file}.compress.log
if [[ ! $(grep -iP "${error_pattern}" "${file}.compress.log") ]]; then
echo -e "Compression completed. New gzipped file:\n${file}.gz" &>>$logfile | tee -a ${file}.compress.log
else
echo -e "ERROR! Compression failed:\n$file" &>>$logfile | tee -a ${file}.compress.log
if [[ $broadcast == "TRUE" ]]; then
echo -e ">>> $(date +'%Y-%m-%d %H:%M:%S') FileCompression(${data_dir}): FileCompression failed! Please check the log: $logfile" >>/etc/motd
fi
exit 1
fi
fi
done
else
echo -e "No file need to be compressed.\nCompression completed.\n" &>>$logfile
fi
ELAPSED="Elapsed: $(($SECONDS / 3600))hrs $((($SECONDS / 60) % 60))min $(($SECONDS % 60))sec"
echo -e "$ELAPSED" &>>$logfile
echo -e "$(date +'%Y-%m-%d %H:%M:%S')"
echo -e "****************** Compression completed ******************\n\n\n" &>>$logfile
if [[ $broadcast == "TRUE" ]]; then
echo -e ">>> $(date +'%Y-%m-%d %H:%M:%S') FileCompression(${data_dir}): FileCompression completed successfully!" >>/etc/motd
fi