Skip to content

Commit

Permalink
implement automated unit testing
Browse files Browse the repository at this point in the history
  • Loading branch information
kheber committed Aug 6, 2024
1 parent 3a16b38 commit 3bd3997
Show file tree
Hide file tree
Showing 22,182 changed files with 79,087 additions and 21,907 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
57 changes: 57 additions & 0 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Tests

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
test_preprocessr:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: make test
run: make -C src test

test_strainr:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache-version: 1
r-version: 'release'
packages: |
any::tidyverse
any::optparse
- name: Plot.R test
run: |
./src/Plot.R -a tests/inputs/ -i tests/inputs/ -p testing
diff tests/inputs/testing_abundance_summary.tsv \
tests/expected_output/expected_abundance_summary.tsv
rm tests/inputs/testing_abundance_summary.tsv
shell: bash

test_comprehensive:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: conda-incubator/setup-miniconda@v3
- name: test run comp
shell: bash -el {0}
run: |
conda install -c bioconda -c conda-forge -y strainr2
conda remove --force -y strainr2
make -C src release
export PATH="$(pwd)/src/:$PATH"
PreProcessR -i tests/genomes/mixture -o tests/StrainR2DB_testing
diff <(sort tests/StrainR2DB_testing/KmerContent.report) <(sort tests/expected_output/KmerContent_comprehensive.report)
StrainR -1 tests/inputs/mock_reads_testing_R1.fastq.gz \
-2 tests/inputs/mock_reads_testing_R2.fastq.gz \
-r tests/StrainR2DB_testing -o tests/StrainR2_out_testing -p testing -t 1
diff <(sort tests/StrainR2_out_testing/testing.abundances) <(sort tests/expected_output/testing_comprehensive.abundances)
diff <(sort tests/StrainR2_out_testing/testing_abundance_summary.tsv) <(sort tests/expected_output/abundance_summary_comprehensive.tsv)
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[![Testing](https://github.com/kheber/StrainR2_testing/actions/workflows/testing.yml/badge.svg)](https://github.com/kheber/StrainR2_testing/actions/workflows/testing.yml)
[![StrainR2 Version](https://anaconda.org/bioconda/strainr2/badges/version.svg)](https://anaconda.org/bioconda/strainr2)
[![Downloads](https://anaconda.org/bioconda/strainr2/badges/downloads.svg)](https://anaconda.org/bioconda/strainr2)
# Background

Traditional methods for quantifying strain abundances in a microbiome, such as 16S rRNA sequencing, lack the resolution to differentiate strains and are limited to generalizing species. Shotgun metagenomic sequencing offers an alternative, but unnormalized abundances such as FPKM have a bias from similar genomes getting fewer unique mappings.
Expand Down Expand Up @@ -32,11 +35,7 @@ To install the source code into a directory onto your computer, clone the source
git clone https://github.com/BisanzLab/StrainR2.git
```

Dependencies need to be installed according to versions listed at the bottom of this document. A .yml file provided in the git repository can be used to create an environment from scratch.
```
conda env create -f StrainR2/strainr2.yml
conda activate strainr2
```
Dependencies need to be installed according to versions listed at the bottom of this document.

Files can be compiled using make
```
Expand Down
2 changes: 1 addition & 1 deletion src/PreProcessR
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ ls "$outdir"/Subcontigs/ | sed -n '/\.subcontig$/p' | sed 's|^|'"$outdir"'/Subco
xargs cat > "$outdir"/BBindex/BBIndex.fasta
ls "$outdir"/excludedSubcontigs/ | sed -n '/\.subcontig$/p' | sed 's|^|'"$outdir"'/excludedSubcontigs/|' | \
xargs cat >> "$outdir"/BBindex/BBIndex.fasta
bbmap.sh ref="$outdir"/BBindex/BBIndex.fasta path="$outdir"/BBindex
bbmap.sh ref="$outdir"/BBindex/BBIndex.fasta path="$outdir"/BBindex deterministic=t averagepairdist=200

echo "PreProcessR complete"
echo "Total Run Time: $((($SECONDS - $START_TIME)/60)) min $((($SECONDS - $START_TIME)%60)) sec"
Expand Down
2 changes: 1 addition & 1 deletion src/StrainR
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ bbmap.sh\
ref="$reference"/BBindex/BBIndex.fasta \
out="$outdir"/"$prefix".sam \
rpkm="$outdir"/"$prefix".rpkm \
threads="$threads" \
threads="$threads" deterministic=t averagepairdist=200 \
-Xmx"$mem"g \
perfectmode=t local=f ambiguous=toss pairedonly=t nodisk=t

Expand Down
15 changes: 6 additions & 9 deletions src/hashcounter.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ hashtable* hashtable_create(uint32_t kmer_size, bool is_small, uint32_t num_subc
ht->subcontig_names = calloc(num_subconts,sizeof(char*));
ht->subcontig_counts = calloc(num_subconts,sizeof(int));
ht->num_subcontigs = num_subconts;
ht->curr_subcontig = 0;
ht->size = INITIAL_HT_SIZE;
ht->count = 0;
ht->entry_bitmask = INITIAL_HT_BITMASK;
Expand Down Expand Up @@ -338,12 +339,11 @@ void hash_and_insert(hashtable* ht, char* dir_location, void (*kmer_func)(hashta
struct dirent *de;
DIR *dr = opendir(dir_location);
if(dr == NULL) {
fprintf(stderr, "Could not open excluded subcontigs directory\n\n");
fprintf(stderr, "Could not open subcontigs directory\n\n");
exit(EXIT_FAILURE);
}
char* subcont_location;
char* subcont_name;
uint32_t subcont_id;
while (((de = readdir(dr)) != NULL)) {
if(!(strlen(de->d_name) >= 10 && strcmp(&de->d_name[strlen(de->d_name) - 10], ".subcontig") == 0)) continue;
uint32_t loc_size = strlen(dir_location)+strlen(de->d_name)+1;
Expand All @@ -357,10 +357,6 @@ void hash_and_insert(hashtable* ht, char* dir_location, void (*kmer_func)(hashta
}
seq = kseq_init(fp);
kseq_read(seq);
memcpy(subcont_location, de->d_name, strlen(de->d_name)+1);
strtok(subcont_location, ".");
strtok(subcont_location, "_");
subcont_id = atoi(strtok(NULL,"_"));
if(seq->comment.s != NULL){
subcont_name = calloc(strlen(seq->name.s)+strlen(seq->comment.s)+2, sizeof(char));
memcpy(subcont_name, seq->name.s, strlen(seq->name.s));
Expand All @@ -370,8 +366,9 @@ void hash_and_insert(hashtable* ht, char* dir_location, void (*kmer_func)(hashta
subcont_name = calloc(strlen(seq->name.s)+1, sizeof(char));
memcpy(subcont_name, seq->name.s, strlen(seq->name.s));
}
ht->subcontig_names[subcont_id] = subcont_name;
hash_and_insert_subcontig(ht, seq->seq.s, subcont_id, kmer_func);
ht->subcontig_names[ht->curr_subcontig] = subcont_name;
hash_and_insert_subcontig(ht, seq->seq.s, ht->curr_subcontig, kmer_func);
++ht->curr_subcontig;
free(subcont_location);
gzclose(fp);
kseq_destroy(seq);
Expand Down Expand Up @@ -463,7 +460,7 @@ int main(int argc, char **argv){
return EXIT_FAILURE;
}
fprintf(kmercontent,"SubcontigID\tStrainID\tContigID\tStart_Stop\tLength\tNunique\n");
uint32_t i=1;
uint32_t i=0;
while(ht->subcontig_names[i]!=NULL){
fprintf(kmercontent,"%s\t", ht->subcontig_names[i]);
subcontig_info = strtok(ht->subcontig_names[i], ";");
Expand Down
1 change: 1 addition & 0 deletions src/hashcounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ typedef struct hashtable{
uint64_t count;
uint32_t* subcontig_counts;
uint32_t num_subcontigs;
uint32_t curr_subcontig;
uint32_t kmer_size;
ht_element_small* items_small; // for use in memory-efficient option
bool is_small;
Expand Down
13 changes: 2 additions & 11 deletions src/subcontig.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@
"\t\t-e number\t: exclude subcontig size (minimum subcontig size) [Default = 10000]\n" \
"\t\t-h\t\t: display this message again\n"

// for testing purposes this code was compiled with:
// gcc -g -fsanitize=address -std=gnu99 -Wall -Wextra -Werror -Wno-unused-function -Wno-unused-parameter -O0 -o subcontig subcontig.c

// the provided binary was compiled using:
// gcc -o subcontig subcontig.c

int subcontigCount = 0;

// save a sequence and appropriate header information to outdir (or excludedSubcontigs if it is less than minSubcontigSize)
void saveSubcontig(char *outdir, char *subcontigName, char *strainID, char *subcontigSeq, int start, int length, char *overlap);
// subcontig a genome and write the sequences to outdir
Expand Down Expand Up @@ -331,7 +323,6 @@ void writeSubcontigs(char *outdir, char *excludeDir, char *genomeLocation, char
// (called from by writeSubcontigs)
void saveSubcontig(char *outdir, char *subcontigName, char *strainID, char *subcontigSeq, int start, int length, char *overlap) {
FILE *fptr = NULL;
++subcontigCount;
char *seq = NULL;
int overlapLen = 0;

Expand All @@ -349,9 +340,9 @@ void saveSubcontig(char *outdir, char *subcontigName, char *strainID, char *subc
char *savedSubcontigName = calloc(needed, sizeof(char));
sprintf(savedSubcontigName, ">%s;%s;%d_%d;%d", strainID, subcontigName, start - overlapLen, start + length - 1, length + overlapLen);

needed = snprintf(NULL, 0, "%ssubcontig_%d.subcontig", outdir, subcontigCount) + 1;
needed = snprintf(NULL, 0, "%s/%s_%d_%d.subcontig", outdir, strainID, start - overlapLen, start + length - 1) + 1;
char *subcontigLocation = calloc(needed, sizeof(char));
sprintf(subcontigLocation, "%ssubcontig_%d.subcontig", outdir, subcontigCount);
sprintf(subcontigLocation, "%s/%s_%d_%d.subcontig", outdir, strainID, start - overlapLen, start + length - 1);

fptr = fopen(subcontigLocation, "w");
if (fptr == NULL) {
Expand Down
2 changes: 0 additions & 2 deletions strainr2.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
name: strainr2
channels:
- bioconda
- conda-forge
Expand Down Expand Up @@ -198,7 +197,6 @@ dependencies:
- readline=8.2
- samtools=1.20
- sed=4.8
- strainr2=2.0.0
- sysroot_linux-64=2.12
- tk=8.6.13
- tktable=2.10
Expand Down
Loading

0 comments on commit 3bd3997

Please sign in to comment.