Skip to content

william-swl/genogamesh

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

15 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

genogamesh

installation

You can install the development version of genogamesh like so:

devtools::install_github("william-swl/genogamesh")

parse bioinfomatic data

  • parse the output of SingleR
# SingleR(test, ref) %>% parse_SingleR()
  • parse somatic hypermutation from igblast output
# parse_IgBlast_shm('igblast_out.txt')
  • parse sequences from CellRanger vdj output
# parse_CellRanger_vdjseq(df)
# parse_CellRanger_vdjseq(df, file='seq.csv')
# parse_CellRanger_vdjseq(df, file='seq.fa', fa_content='seq_orf_nt')
  • parse sequences from ANARCI vdj output
# parse_ANARCI_aaseq(df, chain='H')
# parse_ANARCI_aaseq(df, chain='L')

# keep the ab numbering
# parse_ANARCI_aaseq(df, chain='H', keep_number=TRUE)
  • parse vcf file with the help of reference genome and annotations. It is still under development, which can not process more than 3 nt substitutions in a single record row of vcf file, and can not process indels
# vcf <- read_vcf(...)
# fa <- read_fasta(..)
# gff <- read_gff(...)
# parse_vcf(vcf, fa, gff)

shortcuts for bioinfomatic pipelines

  • add SingleR celltype annotation for Seurat object
# SE <- SingleR_SE(SE, SEref)
  • reduction from raw Seurat object created by read count matrix, including normalization, variable features calling, scaling, PCA and UMAP
# SE <- reduction_SE(SE)
  • translate nucleotides into amino acids from the first character
nt2aa(c("ATGAAA", "TTGCCC", "CTGTTT"))
#> [1] "MK" "LP" "LF"
  • build antigen map from sera titer data
# antigen_map(data, sera_meta, ag_meta, seed=14)

IO

read_fasta()

write_fasta()

read_vcf()

read_gff()

S4 classes in genogamesh

mutstr

  • a S4 class to manipulate mutation strings
  • support set operations
raw_mut_string <- c(
  variant1 = "T10I,D20N,Q30E,A40T,P50L,G60R",
  variant2 = "T10I,D20-,Q30E,A40T,P50L,G60R,S80R",
  variant3 = "T10A,D20G,Q30E,A40T,P50L,G60R"
)

m <- mutstr(raw_mut_string, sep = ",")

m
#> mutstr 3
#>   @ names: variant1 variant2 variant3
#>   @ sep: ,
#>   @ mstr:
#>     [1] T10I,D20N,Q30E,A40T,P50L,G60R
#>     [2] T10I,D20-,Q30E,A40T,P50L,G60R,S80R
#>     [3] T10A,D20G,Q30E,A40T,P50L,G60R
#>   @ mut:
#>     [1] T10I D20N Q30E A40T P50L G60R
#>     [2] T10I D20- Q30E A40T P50L G60R S80R
#>     [3] T10A D20G Q30E A40T P50L G60R

names(m)
#> [1] "variant1" "variant2" "variant3"

mstr(m)
#>                             variant1                             variant2 
#>      "T10I,D20N,Q30E,A40T,P50L,G60R" "T10I,D20-,Q30E,A40T,P50L,G60R,S80R" 
#>                             variant3 
#>      "T10A,D20G,Q30E,A40T,P50L,G60R"

mut(m)
#> $variant1
#> [1] "T10I" "D20N" "Q30E" "A40T" "P50L" "G60R"
#> 
#> $variant2
#> [1] "T10I" "D20-" "Q30E" "A40T" "P50L" "G60R" "S80R"
#> 
#> $variant3
#> [1] "T10A" "D20G" "Q30E" "A40T" "P50L" "G60R"

m[1:2]
#> mutstr 2
#>   @ names: variant1 variant2
#>   @ sep: ,
#>   @ mstr:
#>     [1] T10I,D20N,Q30E,A40T,P50L,G60R
#>     [2] T10I,D20-,Q30E,A40T,P50L,G60R,S80R
#>   @ mut:
#>     [1] T10I D20N Q30E A40T P50L G60R
#>     [2] T10I D20- Q30E A40T P50L G60R S80R

m[[2]]
#> [1] "T10I" "D20-" "Q30E" "A40T" "P50L" "G60R" "S80R"

intersect(m, m[1])
#> mutstr 3
#>   @ names: variant1 variant2 variant3
#>   @ sep: ,
#>   @ mstr:
#>     [1] T10I,D20N,Q30E,A40T,P50L,G60R
#>     [2] T10I,Q30E,A40T,P50L,G60R
#>     [3] Q30E,A40T,P50L,G60R
#>   @ mut:
#>     [1] T10I D20N Q30E A40T P50L G60R
#>     [2] T10I Q30E A40T P50L G60R
#>     [3] Q30E A40T P50L G60R

setdiff(m, m[1])
#> mutstr 3
#>   @ names: variant1 variant2 variant3
#>   @ sep: ,
#>   @ mstr:
#>     [1] 
#>     [2] D20-,S80R
#>     [3] T10A,D20G
#>   @ mut:
#>     [1] 
#>     [2] D20- S80R
#>     [3] T10A D20G

union(m, m[1])
#> mutstr 3
#>   @ names: variant1 variant2 variant3
#>   @ sep: ,
#>   @ mstr:
#>     [1] T10I,D20N,Q30E,A40T,P50L,G60R
#>     [2] T10I,D20-,Q30E,A40T,P50L,G60R,S80R,D20N
#>     [3] T10A,D20G,Q30E,A40T,P50L,G60R,T10I,D20N
#>   @ mut:
#>     [1] T10I D20N Q30E A40T P50L G60R
#>     [2] T10I D20- Q30E A40T P50L G60R S80R D20N
#>     [3] T10A D20G Q30E A40T P50L G60R T10I D20N

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages