-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
1276 lines (1186 loc) · 105 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{power2017microbial,
title={Microbial genome-wide association studies: lessons from human GWAS},
author={Power, Robert A and Parkhill, Julian and De Oliveira, Tulio},
journal={Nature reviews genetics},
volume={18},
number={1},
pages={41--50},
year={2017},
publisher={Nature Publishing Group UK London}
}
@article{uffelmann2021genome,
title={Genome-wide association studies},
author={Uffelmann, Emil and Huang, Qin Qin and Munung, Nchangwi Syntia and De Vries, Jantina and Okada, Yukinori and Martin, Alicia R and Martin, Hilary C and Lappalainen, Tuuli and Posthuma, Danielle},
journal={Nature Reviews Methods Primers},
volume={1},
number={1},
pages={59},
year={2021},
publisher={Nature Publishing Group UK London}
}
@article{van2020bioinformatics,
title={Bioinformatics approaches to the understanding of molecular mechanisms in antimicrobial resistance},
author={Van Camp, Pieter-Jan and Haslam, David B and Porollo, Aleksey},
journal={International journal of molecular sciences},
volume={21},
number={4},
pages={1363},
year={2020},
publisher={MDPI}
}
@article{ren2022prediction,
title={Prediction of antimicrobial resistance based on whole-genome sequencing and machine learning},
author={Ren, Yunxiao and Chakraborty, Trinad and Doijad, Swapnil and Falgenhauer, Linda and Falgenhauer, Jane and Goesmann, Alexander and Hauschild, Anne-Christin and Schwengers, Oliver and Heider, Dominik},
journal={Bioinformatics},
volume={38},
number={2},
pages={325--334},
year={2022},
publisher={Oxford University Press}
}
@article{hyun2020machine,
title={Machine learning with random subspace ensembles identifies antimicrobial resistance determinants from pan-genomes of three pathogens},
author={Hyun, Jason C and Kavvas, Erol S and Monk, Jonathan M and Palsson, Bernhard O},
journal={PLoS computational biology},
volume={16},
number={3},
pages={e1007608},
year={2020},
publisher={Public Library of Science San Francisco, CA USA}
}
patric
@article{wattam2014patric,
title={PATRIC, the bacterial bioinformatics database and analysis resource},
author={Wattam, Alice R and Abraham, David and Dalay, Oral and Disz, Terry L and Driscoll, Timothy and Gabbard, Joseph L and Gillespie, Joseph J and Gough, Roger and Hix, Deborah and Kenyon, Ronald and others},
journal={Nucleic acids research},
volume={42},
number={D1},
pages={D581--D591},
year={2014},
publisher={Oxford University Press}
}
card
@article{alcock2023card,
title={CARD 2023: expanded curation, support for machine learning, and resistome prediction at the Comprehensive Antibiotic Resistance Database},
author={Alcock, Brian P and Huynh, William and Chalil, Romeo and Smith, Keaton W and Raphenya, Amogelang R and Wlodarski, Mateusz A and Edalatmand, Arman and Petkau, Aaron and Syed, Sohaib A and Tsang, Kara K and others},
journal={Nucleic acids research},
volume={51},
number={D1},
pages={D690--D699},
year={2023},
publisher={Oxford University Press}
}
@article{kramer2016scikit,
title={Scikit-learn},
author={Kramer, Oliver and Kramer, Oliver},
journal={Machine learning for evolution strategies},
pages={45--53},
year={2016},
publisher={Springer}
}
@article{hirschberg2007v,
title={V-Measure: a conditional entropy-based external cluster evaluation},
author={Hirschberg, Julia Bell and Rosenberg, Andrew},
year={2007}
}
@book{molnar2020interpretable,
title={Interpretable machine learning},
author={Molnar, Christoph},
year={2020},
publisher={Lulu. com}
}
@article{seabold2010statsmodels,
title={Statsmodels: econometric and statistical modeling with python.},
author={Seabold, Skipper and Perktold, Josef},
journal={SciPy},
volume={7},
pages={1},
year={2010}
}
@inproceedings{bastian2009gephi,
title={Gephi: an open source software for exploring and manipulating networks},
author={Bastian, Mathieu and Heymann, Sebastien and Jacomy, Mathieu},
booktitle={Proceedings of the international AAAI conference on web and social media},
volume={3},
number={1},
pages={361--362},
year={2009}
}
@techreport{hagberg2008exploring,
title={Exploring network structure, dynamics, and function using NetworkX},
author={Hagberg, Aric and Swart, Pieter and S Chult, Daniel},
year={2008},
institution={Los Alamos National Lab.(LANL), Los Alamos, NM (United States)}
}
cd hit:
@article{fu2012cd,
title={CD-HIT: accelerated for clustering the next-generation sequencing data},
author={Fu, Limin and Niu, Beifang and Zhu, Zhengwei and Wu, Sitao and Li, Weizhong},
journal={Bioinformatics},
volume={28},
number={23},
pages={3150--3152},
year={2012},
publisher={Oxford University Press}
}
@article{li2006cd,
title={Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences},
author={Li, Weizhong and Godzik, Adam},
journal={Bioinformatics},
volume={22},
number={13},
pages={1658--1659},
year={2006},
publisher={Oxford University Press}
}
@book{netSciBible,
title = {Network Science},
author = {Albert-Laszlo Barabasi},
publisher = {Cambridge University Press},
isbn = {},
year = {2016}
}
@article{libbrecht2015machine,
title={Machine learning applications in genetics and genomics},
author={Libbrecht, Maxwell W and Noble, William Stafford},
journal={Nature Reviews Genetics},
volume={16},
number={6},
pages={321--332},
year={2015},
publisher={Nature Publishing Group UK London}
}
@article{schubert2019genome,
title={Genome-wide discovery of epistatic loci affecting antibiotic resistance in Neisseria gonorrhoeae using evolutionary couplings},
author={Schubert, Benjamin and Maddamsetti, Rohan and Nyman, Jackson and Farhat, Maha R and Marks, Debora S},
journal={Nature Microbiology},
volume={4},
number={2},
pages={328--338},
year={2019},
publisher={Nature Publishing Group UK London}
}
@article{mosquera2023genome,
title={Genome-Wide Association Studies (GWAS) Approaches for the Detection of Genetic Variants Associated with Antibiotic Resistance: A Systematic Review},
author={Mosquera-Rend{\'o}n, Jeanneth and Moreno-Herrera, Claudia Ximena and Robledo, Jaime and Hurtado-P{\'a}ez, Uriel},
journal={Microorganisms},
volume={11},
number={12},
pages={2866},
year={2023},
publisher={MDPI}
}
@article{su2019genome,
title={Genome-based prediction of bacterial antibiotic resistance},
author={Su, Michelle and Satola, Sarah W and Read, Timothy D},
journal={Journal of clinical microbiology},
volume={57},
number={3},
pages={10--1128},
year={2019},
publisher={Am Soc Microbiol}
}
@article{olson2023introducing,
title={Introducing the bacterial and viral bioinformatics resource center (BV-BRC): a resource combining PATRIC, IRD and ViPR},
author={Olson, Robert D and Assaf, Rida and Brettin, Thomas and Conrad, Neal and Cucinell, Clark and Davis, James J and Dempsey, Donald M and Dickerman, Allan and Dietrich, Emily M and Kenyon, Ronald W and others},
journal={Nucleic acids research},
volume={51},
number={D1},
pages={D678--D689},
year={2023},
publisher={Oxford University Press}
}
@article{wiatrak2024sequence,
title={Sequence-based modelling of bacterial genomes enables accurate antibiotic resistance prediction},
author={Wiatrak, Maciej and Weimann, Aaron and Dinan, Adam M and Brbi{\'c}, Maria and Floto, R Andres},
journal={bioRxiv},
pages={2024--01},
year={2024},
publisher={Cold Spring Harbor Laboratory}
}
@article{eyre2017wgs,
title={WGS to predict antibiotic MICs for Neisseria gonorrhoeae},
author={Eyre, David W and De Silva, Dilrini and Cole, Kevin and Peters, Joanna and Cole, Michelle J and Grad, Yonatan H and Demczuk, Walter and Martin, Irene and Mulvey, Michael R and Crook, Derrick W and others},
journal={Journal of Antimicrobial Chemotherapy},
volume={72},
number={7},
pages={1937--1947},
year={2017},
publisher={Oxford University Press}
}
@article{Gysi2021,
abstract = {The COVID-19 pandemic has highlighted the need to quickly and reliably prioritize clinically approved compounds for their potential effectiveness for severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infections. Here, we deployed algorithms relying on artificial intelligence, network diffusion, and network proximity, tasking each of them to rank 6,340 drugs for their expected efficacy against SARS-CoV-2. To test the predictions, we used as ground truth 918 drugs experimentally screened in VeroE6 cells, as well as the list of drugs in clinical trials that capture the medical community's assessment of drugs with potential COVID-19 efficacy. We find that no single predictive algorithm offers consistently reliable outcomes across all datasets and metrics. This outcome prompted us to develop a multimodal technology that fuses the predictions of all algorithms, finding that a consensus among the different predictive methods consistently exceeds the performance of the best individual pipelines. We screened in human cells the top-ranked drugs, obtaining a 62% success rate, in contrast to the 0.8% hit rate of nonguided screenings. Of the six drugs that reduced viral infection, four could be directly repurposed to treat COVID-19, proposing novel treatments for COVID-19. We also found that 76 of the 77 drugs that successfully reduced viral infection do not bind the proteins targeted by SARS-CoV-2, indicating that these network drugs rely on network-based mechanisms that cannot be identified using docking-based strategies. These advances offer a methodological pathway to identify repurposable drugs for future pathogens and neglected diseases underserved by the costs and extended timeline of de novo drug development.},
author = {Deisy Morselli Gysi and Ítalo Do Valle and Marinka Zitnik and Asher Ameli and Xiao Gan and Onur Varol and Susan Dina Ghiassian and J. J. Patten and Robert A. Davey and Joseph Loscalzo and Albert László Barabási},
doi = {10.1073/PNAS.2025581118/SUPPL_FILE/PNAS.2025581118.SD12.XLSX},
issn = {10916490},
issue = {19},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
keywords = {Drug repurposing,Infectious diseases,Network medicine,Systems biology},
month = {5},
pages = {e2025581118},
pmid = {33906951},
publisher = {National Academy of Sciences},
title = {Network medicine framework for identifying drug-repurposing opportunities for COVID-19},
volume = {118},
url = {https://www.pnas.org/doi/abs/10.1073/pnas.2025581118},
year = {2021},
}
@article{Boolchandani2019,
abstract = {Antimicrobial resistance extracts high morbidity, mortality and economic costs yearly by rendering bacteria immune to antibiotics. Identifying and understanding antimicrobial resistance are imperative for clinical practice to treat resistant infections and for public health efforts to limit the spread of resistance. Technologies such as next-generation sequencing are expanding our abilities to detect and study antimicrobial resistance. This Review provides a detailed overview of antimicrobial resistance identification and characterization methods, from traditional antimicrobial susceptibility testing to recent deep-learning methods. We focus on sequencing-based resistance discovery and discuss tools and databases used in antimicrobial resistance studies. Next-generation sequencing has improved the identification and characterization of antimicrobial resistance. Focusing on sequence-based discovery of antibiotic resistance genes, this Review discusses computational strategies and resources for resistance gene identification in genomic and metagenomic samples, including recent deep-learning approaches.},
author = {Manish Boolchandani and Alaric W. D’Souza and Gautam Dantas},
doi = {10.1038/s41576-019-0108-4},
issn = {1471-0064},
issue = {6},
journal = {Nature Reviews Genetics 2019 20:6},
keywords = {Antimicrobial resistance,Infectious diseases,Metagenomics,Microbial genetics,Next,generation sequencing},
month = {3},
pages = {356-370},
pmid = {30886350},
publisher = {Nature Publishing Group},
title = {Sequencing-based methods and resources to study antimicrobial resistance},
volume = {20},
url = {https://www.nature.com/articles/s41576-019-0108-4},
year = {2019},
}
@article{Crook2022,
abstract = {The Comprehensive Resistance Prediction for Tuberculosis: an International Consortium (CRyPTIC) presents here a data compendium of 12,289 Mycobacterium tuberculosis global clinical isolates, all of which have undergone whole-genome sequencing and have had their minimum inhibitory concentrations to 13 antitubercular drugs measured in a single assay. It is the largest matched phenotypic and genotypic dataset for M. tuberculosis to date. Here, we provide a summary detailing the breadth of data collected, along with a description of how the isolates were selected, collected, and uniformly processed in CRyPTIC partner laboratories across 23 countries. The compendium contains 6,814 isolates resistant to at least 1 drug, including 2,129 samples that fully satisfy the clinical definitions of rifampicin resistant (RR), multidrug resistant (MDR), pre-extensively drug resistant (pre-XDR), or extensively drug resistant (XDR). The data are enriched for rare resistance-associated variants, and the current limits of genotypic prediction of resistance status (sensitive/resistant) are presented by using a genetic mutation catalogue, along with the presence of suspected resistance-conferring mutations for isolates resistant to the newly introduced drugs bedaquiline, clofazimine, delamanid, and linezolid. Finally, a case study of rifampicin monoresistance demonstrates how this compendium could be used to advance our genetic understanding of rare resistance phenotypes. The data compendium is fully open source and it is hoped that it will facilitate and inspire future research for years to come.},
author = {Derrick W. Crook and Timothy E.A. Peto and Sarah J. Hoosdally and Ana Lúıza Gibertoni Cruz and A. Sarah Walker and Timothy M. Walker and Philip W. Fowler and Zamin Iqbal and Daniela Maria Cirillo and Alice Brankin and Kerri M. Malone and Martin Hunt and Jeff Knaggs and Nerges Mistry and Camilla Rodrigues and David Moore and Nazir Ahmed Ismail and Stefan Niemann and Aysha Roohi and Brice Letcher and Paola M.V. Rancoita and Emanuele Borroni and Clara Grazian},
doi = {10.1371/journal.pbio.3001721},
issn = {15457885},
issue = {8},
journal = {PLoS biology},
month = {8},
pages = {e3001721},
pmid = {35944069},
publisher = {NLM (Medline)},
title = {A data compendium associating the genomes of 12,289 Mycobacterium tuberculosis isolates with quantitative resistance phenotypes to 13 antibiotics},
volume = {20},
year = {2022},
}
@article{,
abstract = {Most existing machine translation systems operate at the level of words, relying on explicit segmentation to extract tokens. We introduce a neural machine translation (NMT) model that maps a source character sequence to a target character sequence without any segmentation. We employ a character-level convolutional network with max-pooling at the encoder to reduce the length of source representation, allowing the model to be trained at a speed comparable to subword-level models while capturing local regularities. Our character-to-character model outperforms a recently proposed baseline with a subword-level encoder on WMT’15 DE-EN and CS-EN, and gives comparable performance on FI-EN and RU-EN. We then demonstrate that it is possible to share a single character-level encoder across multiple languages by training a model on a many-to-one translation task. In this multilingual setting, the character-level encoder significantly outperforms the subword-level encoder on all the language pairs. We observe that on CS-EN, FI-EN and RU-EN, the quality of the multilingual character-level translation even surpasses the models specifically trained on that language pair alone, both in terms of the BLEU score and human judgment.},
author = {Benjamin Sanchez-Lengeling and Emily Reif and Adam Pearce and Alexander B. Wiltschko},
doi = {10.23915/DISTILL.00033},
issn = {2476-0757},
issue = {9},
journal = {Distill},
month = {9},
pages = {e33},
publisher = {Distill Working Group},
title = {A Gentle Introduction to Graph Neural Networks},
volume = {6},
url = {https://distill.pub/2021/gnn-intro},
year = {2021},
}
@article{Fey2019,
abstract = {We introduce PyTorch Geometric, a library for deep learning on irregularly structured input data such as graphs, point clouds and manifolds, built upon PyTorch. In addition to general graph data structures and processing methods, it contains a variety of recently published methods from the domains of relational learning and 3D data processing. PyTorch Geometric achieves high data throughput by leveraging sparse GPU acceleration, by providing dedicated CUDA kernels and by introducing efficient mini-batch handling for input examples of different size. In this work, we present the library in detail and perform a comprehensive comparative study of the implemented methods in homogeneous evaluation scenarios.},
author = {Matthias Fey and Jan Eric Lenssen},
month = {3},
title = {Fast Graph Representation Learning with PyTorch Geometric},
url = {https://arxiv.org/abs/1903.02428v3},
year = {2019},
}
@article{Hunt2022,
abstract = {There are many short-read variant-calling tools, with different strengths and weaknesses. We present a tool, Minos, which combines outputs from arbitrary variant callers, increasing recall without loss of precision. We benchmark on 62 samples from three bacterial species and an outbreak of 385 Mycobacterium tuberculosis samples. Minos also enables joint genotyping; we demonstrate on a large (N=13k) M. tuberculosis cohort, building a map of non-synonymous SNPs and indels in a region where all such variants are assumed to cause rifampicin resistance. We quantify the correlation with phenotypic resistance and then replicate in a second cohort (N=10k).},
author = {Martin Hunt and Brice Letcher and Kerri M. Malone and Giang Nguyen and Michael B. Hall and Rachel M. Colquhoun and Leandro Lima and Michael C. Schatz and Srividya Ramakrishnan and Zamin Iqbal},
doi = {10.1186/S13059-022-02714-X/FIGURES/3},
issn = {1474760X},
issue = {1},
journal = {Genome Biology},
keywords = {Animal Genetics and Genomics,Bioinformatics,Evolutionary Biology,Human Genetics,Microbial Genetics and Genomics,Plant Genetics and Genomics},
month = {12},
pages = {1-23},
pmid = {35791022},
publisher = {BioMed Central Ltd},
title = {Minos: variant adjudication and joint genotyping of cohorts of bacterial genomes},
volume = {23},
url = {https://link.springer.com/articles/10.1186/s13059-022-02714-x https://link.springer.com/article/10.1186/s13059-022-02714-x},
year = {2022},
}
@article{,
abstract = {Background: Early detection of antimicrobial resistance in pathogens and prescription of more effective antibiotics is a fast-emerging need in clinical practice. High-throughput sequencing technology, such as whole genome sequencing (WGS), may have the capacity to rapidly guide the clinical decision-making process. The prediction of antimicrobial resistance in Gram-negative bacteria, often the cause of serious systemic infections, is more challenging as genotype-to-phenotype (drug resistance) relationship is more complex than for most Gram-positive organisms. Methods and Findings: We have used NCBI BioSample database to train and cross-validate eight XGBoost-based machine learning models to predict drug resistance to cefepime, cefotaxime, ceftriaxone, ciprofloxacin, gentamicin, levofloxacin, meropenem, and tobramycin tested in Acinetobacter baumannii, Escherichia coli, Enterobacter cloacae, Klebsiella aerogenes, and Klebsiella pneumoniae. The input is the WGS data in terms of the coverage of known antibiotic resistance genes by shotgun sequencing reads. Models demonstrate high performance and robustness to class imbalanced datasets. Conclusion: Whole genome sequencing enables the prediction of antimicrobial resistance in Gram-negative bacteria. We present a tool that provides an in silico antibiogram for eight drugs. Predictions are accompanied with a reliability index that may further facilitate the decision making process. The demo version of the tool with pre-processed samples is available at https://vancampn.shinyapps.io/wgs2amr/. The stand-alone version of the predictor is available at https://github.com/pieterjanvc/wgs2amr/.},
author = {Pieter Jan Van Camp and David B. Haslam and Aleksey Porollo},
doi = {10.3389/FMICB.2020.01013/BIBTEX},
issn = {1664302X},
journal = {Frontiers in Microbiology},
keywords = {antibiotic resistance,antimicrobial resistance,genotype-phenotype relationship,machine learning,prediction,whole-genome sequencing},
month = {5},
pages = {530987},
publisher = {Frontiers Media S.A.},
title = {Prediction of Antimicrobial Resistance in Gram-Negative Bacteria From Whole-Genome Sequencing Data},
volume = {11},
year = {2020},
}
@article{Zhou2020,
abstract = {Lots of learning tasks require dealing with graph data which contains rich relation information among elements. Modeling physics systems, learning molecular fingerprints, predicting protein interface, and classifying diseases demand a model to learn from graph inputs. In other domains such as learning from non-structural data like texts and images, reasoning on extracted structures (like the dependency trees of sentences and the scene graphs of images) is an important research topic which also needs graph reasoning models. Graph neural networks (GNNs) are neural models that capture the dependence of graphs via message passing between the nodes of graphs. In recent years, variants of GNNs such as graph convolutional network (GCN), graph attention network (GAT), graph recurrent network (GRN) have demonstrated ground-breaking performances on many deep learning tasks. In this survey, we propose a general design pipeline for GNN models and discuss the variants of each component, systematically categorize the applications, and propose four open problems for future research.},
author = {Jie Zhou and Ganqu Cui and Shengding Hu and Zhengyan Zhang and Cheng Yang and Zhiyuan Liu and Lifeng Wang and Changcheng Li and Maosong Sun},
doi = {10.1016/J.AIOPEN.2021.01.001},
issn = {2666-6510},
journal = {AI Open},
keywords = {Deep learning,Graph neural network},
month = {1},
pages = {57-81},
publisher = {Elsevier},
title = {Graph neural networks: A review of methods and applications},
volume = {1},
year = {2020},
}
@article{,
abstract = {We present graph attention networks (GATs), novel neural network architectures that operate on graph-structured data, leveraging masked self-attentional layers to address the shortcomings of prior methods based on graph convolutions or their approximations. By stacking layers in which nodes are able to attend over their neighborhoods' features, we enable (implicitly) specifying different weights to different nodes in a neighborhood, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront. In this way, we address several key challenges of spectral-based graph neural networks simultaneously, and make our model readily applicable to inductive as well as transductive problems. Our GAT models have achieved or matched state-of-the-art results across four established transductive and inductive graph benchmarks: the Cora, Citeseer and Pubmed citation network datasets, as well as a protein-protein interaction dataset (wherein test graphs remain unseen during training).},
author = {Petar Veličkovi´veličkovi´c and Guillem Cucurull and Arantxa Casanova and Adriana Romero and Pietro Lì and Yoshua Bengio},
isbn = {1710.10903v3},
title = {GRAPH ATTENTION NETWORKS},
}
@article{,
abstract = {Motivation: Antimicrobial resistance (AMR) is one of the biggest global problems threatening human and animal health. Rapid and accurate AMR diagnostic methods are thus very urgently needed. However, traditional antimicro-bial susceptibility testing (AST) is time-consuming, low throughput and viable only for cultivable bacteria. Machine learning methods may pave the way for automated AMR prediction based on genomic data of the bacteria. However, comparing different machine learning methods for the prediction of AMR based on different encodings and whole-genome sequencing data without previously known knowledge remains to be done. Results: In this study, we evaluated logistic regression (LR), support vector machine (SVM), random forest (RF) and convolutional neural network (CNN) for the prediction of AMR for the antibiotics ciprofloxacin, cefotaxime, ceftazi-dime and gentamicin. We could demonstrate that these models can effectively predict AMR with label encoding, one-hot encoding and frequency matrix chaos game representation (FCGR encoding) on whole-genome sequencing data. We trained these models on a large AMR dataset and evaluated them on an independent public dataset. Generally, RFs and CNNs perform better than LR and SVM with AUCs up to 0.96. Furthermore, we were able to identify mutations that are associated with AMR for each antibiotic. Availability and implementation: Source code in data preparation and model training are provided at GitHub web-site (https://github.com/YunxiaoRen/ML-iAMR).},
author = {Yunxiao Ren and Trinad Chakraborty and Swapnil Doijad and Linda Falgenhauer and Jane Falgenhauer and Alexander Goesmann and Anne-Christin Hauschild and Oliver Schwengers and Dominik Heider},
doi = {10.1093/bioinformatics/btab681},
title = {Prediction of antimicrobial resistance based on whole-genome sequencing and machine learning},
url = {https://github.com/YunxiaoRen/ML-iAMR},
}
@article{Maruyama2020,
abstract = {Antimicrobial resistance (AMR) in the nosocomial pathogen, Acinetobacter baumannii, is becoming a serious public health threat. While some mechanisms of AMR have been reported, understanding novel mechanisms of resistance is critical for identifying emerging resistance. One of the first steps in identifying novel AMR mechanisms is performing genotype/phenotype association studies; however, performing these studies is complicated by the plastic nature of the A. baumannii pan-genome. In this study, we compared the antibiograms of 12 antimicrobials associated with multiple drug families for 84 A. baumannii isolates, many isolated in Arizona, USA. in silico screening of these genomes for known AMR mechanisms failed to identify clear correlations for most drugs. We then performed a bacterial genome wide association study (bGWAS) looking for associations between all possible 21-mers; this approach generally failed to identify mechanisms that explained the resistance phenotype. In order to decrease the genomic noise associated with population stratification, we compared four phylogenetically-related pairs of isolates with differing susceptibility profiles. RNA-Sequencing (RNA-Seq) was performed on paired isolates and differentially-expressed genes were identified. In these isolate pairs, five different potential mechanisms were identified, highlighting the difficulty of broad AMR surveillance in this species. To verify and validate differential expression, amplicon sequencing was performed. These results suggest that a diagnostic platform based on gene expression rather than genomics alone may be beneficial in certain surveillance efforts. The implementation of such advanced diagnostics coupled with increased AMR surveillance will potentially improve A. baumannii infection treatment and patient outcomes.},
author = {Fumito Maruyama and Hirokazu Yano and Jason W Sahl and Chandler Roe and Charles H D Williamson and Adam J Vazquez and Kristen Kyger and Michael Valentine and Jolene R Bowers and Paul D Phillips and Veronica Harrison and Elizabeth Driebe and David M Engelthaler},
doi = {10.3389/fpubh.2020.00451},
journal = {Frontiers in Public Health | www.frontiersin.org},
keywords = {AMR,acinetobacter,bioinformatics,genomics,transcriptomics},
pages = {451},
title = {Bacterial Genome Wide Association Studies (bGWAS) and Transcriptomics Identifies Cryptic Antimicrobial Resistance Mechanisms in Acinetobacter baumannii},
volume = {8},
url = {www.frontiersin.org},
year = {2020},
}
@article{Dey2022,
abstract = {Antimicrobial resistance (AMR) among microorganisms has become one of the worldwide concerns of this century and continues to challenge us. To properly understand this problem, it is essential to know the genes that cause AMR and their resistance mechanisms. Our present study focused on Klebsiella pneumoniae, which possesses AMR genes conferring resistance against multiple antibiotics. A gene interaction network of 42 functional partners was constructed and analyzed to broaden our understanding. Three closely related clusters (C1–C3) having an association with multi-drug resistance mechanisms were identified by clustering analysis. The enrichment analysis illustrated 30 genes in biological processes, 24 genes in molecular function, and 25 genes in cellular components having a significant role. The analysis of the gene interaction network revealed genes birA2, folP, pabC, folA, gyrB, glmM, gyrA, thyA_2 had maximum no. of interactions with their functional partners viz. 26, 25, 25, 24, 23, 23, 23, 23 respectively and can be considered as hub genes. Analyzing the enriched pathways and Gene Ontologies provides insight into AMR's molecular basis. In addition, the proposed study could aid the researchers in developing new treatment options to combat multi-drug resistant K. pneumoniae.},
author = {Hrituraj Dey and Karthick Vasudevan and K. R. Dasegowda and Majji Rambabu and Prashantha CN and George Priya Doss C},
doi = {10.1016/J.MICPATH.2022.105878},
issn = {0882-4010},
journal = {Microbial Pathogenesis},
keywords = {AMR,Cytoscape,Gene interaction network,Klebsiella pneumoniae,Multi-drug resistant (MDR),birA2,folP},
month = {12},
pages = {105878},
pmid = {36372206},
publisher = {Academic Press},
title = {An integrated gene network analysis to decode the multi-drug resistance mechanism in Klebsiella pneumoniae},
volume = {173},
year = {2022},
}
@article{Her2018,
abstract = {Motivation: Antimicrobial resistance (AMR) is becoming a huge problem in both developed and developing countries, and identifying strains resistant or susceptible to certain antibiotics is essential in fighting against antibiotic-resistant pathogens. Whole-genome sequences have been collected for different microbial strains in order to identify crucial characteristics that allow certain strains to become resistant to antibiotics; however, a global inspection of the gene content responsible for AMR activities remains to be done. Results: We propose a pan-genome-based approach to characterize antibiotic-resistant microbial strains and test this approach on the bacterial model organism Escherichia coli. By identifying core and accessory gene clusters and predicting AMR genes for the E. coli pan-genome, we not only showed that certain classes of genes are unevenly distributed between the core and accessory parts of the pan-genome but also demonstrated that only a portion of the identified AMR genes belong to the accessory genome. Application of machine learning algorithms to predict whether specific strains were resistant to antibiotic drugs yielded the best prediction accuracy for the set of AMR genes within the accessory part of the pan-genome, suggesting that these gene clusters were most crucial to AMR activities in E. coli. Selecting subsets of AMR genes for different antibiotic drugs based on a genetic algorithm (GA) achieved better prediction performances than the gene sets established in the literature, hinting that the gene sets selected by the GA may warrant further analysis in investigating more details about how E. coli fight against antibiotics.},
author = {Hsuan-Lin Her and Yu-Wei Wu},
doi = {10.1093/bioinformatics/bty276},
issn = {1367-4803},
issue = {13},
journal = {Bioinformatics},
month = {7},
pages = {i89-i95},
title = {A pan-genome-based machine learning approach for predicting antimicrobial resistance activities of the <i>Escherichia coli</i> strains},
volume = {34},
url = {https://academic.oup.com/bioinformatics/article/34/13/i89/5045729},
year = {2018},
}
@article{,
abstract = {Machine learning is a proven method to predict AMR; however, the performance of any machine learning model depends on the quality of the input data. Therefore, we evaluated different methods of representing information about mutations as well as mobilizable genes, so that the information can serve as input for a robust model. We combined data from multiple bacterial species in order to develop species-independent machine learning models that can predict resistance profiles for multiple antimicrobials and species with high performance. Machine learning has proven to be a powerful method to predict antimicrobial resistance (AMR) without using prior knowledge for selected bacterial species-antimicrobial combinations. To date, only species-specific machine learning models have been developed, and to the best of our knowledge, the inclusion of information from multiple species has not been attempted. The aim of this study was to determine the feasibility of including information from multiple bacterial species to predict AMR for an individual species, since this may make it easier to train and update resistance predictions for multiple species and may lead to improved predictions. Whole-genome sequence data and susceptibility profiles from 3,528 Mycobacterium tuberculosis , 1,694 Escherichia coli , 658 Salmonella enterica , and 1,236 Staphylococcus aureus isolates were included. We developed machine learning models trained by the features of the PointFinder and ResFinder programs detected to predict binary (susceptible/resistant) AMR profiles. We tested four feature representation methods to determine the most efficient way for introducing features into the models. When training the model only on the Mycobacterium tuberculosis isolates, high prediction performances were obtained for the six AMR profiles included. By adding information on ciprofloxacin from the additional 3,588 isolates, there was no reduction in performance for the other antimicrobials but an increased performance for ciprofloxacin AMR profile prediction for Mycobacterium tuberculosis and Escherichia coli . In conclusion, the species-independent models can predict multi-AMR profiles for multiple species without losing any robustness. IMPORTANCE Machine learning is a proven method to predict AMR; however, the performance of any machine learning model depends on the quality of the input data. Therefore, we evaluated different methods of representing information about mutations as well as mobilizable genes, so that the information can serve as input for a robust model. We combined data from multiple bacterial species in order to develop species-independent machine learning models that can predict resistance profiles for multiple antimicrobials and species with high performance. },
author = {D. Aytan-Aktug and P. T. L. C. Clausen and V. Bortolaia and F. M. Aarestrup and O. Lund},
doi = {10.1128/MSYSTEMS.00774-19/SUPPL_FILE/MSYSTEMS.00774-19-SD003.XLSX},
issn = {2379-5077},
issue = {1},
journal = {mSystems},
keywords = {AMR,antimicrobial resistance,machine learning,neural networks},
month = {2},
pmid = {31964771},
publisher = {American Society for Microbiology},
title = {Prediction of Acquired Antimicrobial Resistance for Multiple Bacterial Species Using Neural Networks},
volume = {5},
url = {https://journals.asm.org/doi/10.1128/msystems.00774-19},
year = {2020},
}
@article{Muzio2023,
abstract = {Motivation: While the search for associations between genetic markers and complex traits has led to the discovery of tens of thousands of trait-related genetic variants, the vast majority of these only explain a small fraction of the observed phenotypic variation. One possible strategy to overcome this while leveraging biological prior is to aggregate the effects of several genetic markers and to test entire genes, pathways or (sub)networks of genes for association to a phenotype. The latter, network-based genome-wide association studies, in particular suffer from a vast search space and an inherent multiple testing problem. As a consequence, current approaches are either based on greedy feature selection, thereby risking that they miss relevant associations, or neglect doing a multiple testing correction, which can lead to an abundance of false positive findings. Results: To address the shortcomings of current approaches of network-based genome-wide association studies, we propose networkGWAS, a computationally efficient and statistically sound approach to network-based genome-wide association studies using mixed models and neighborhood aggregation. It allows for population structure correction and for well-calibrated P-values, which are obtained through circular and degree-preserving network permutations. networkGWAS successfully detects known associations on diverse synthetic phenotypes, as well as known and novel genes in phenotypes from Saccharomycescerevisiae and Homo sapiens. It thereby enables the systematic combination of gene-based genome-wide association studies with biological network information.},
author = {Giulia Muzio and Leslie O'bray and Laetitia Meng-Papaxanthos and Juliane Klatt and Krista Fischer and Karsten Borgwardt},
doi = {10.1093/BIOINFORMATICS/BTAD370},
issn = {13674811},
issue = {6},
journal = {Bioinformatics},
month = {6},
pmid = {37285313},
publisher = {Oxford Academic},
title = {networkGWAS: a network-based approach to discover genetic associations},
volume = {39},
url = {https://dx.doi.org/10.1093/bioinformatics/btad370},
year = {2023},
}
@article{Yang2022,
abstract = {Background: Predicting which pathogens might exhibit antimicrobial resistance (AMR) based on genomics data is one of the promising ways to swiftly and precisely identify AMR pathogens. Currently, the most widely used genomics approach is through identifying known AMR genes from genomic information in order to predict whether a pathogen might be resistant to certain antibiotic drugs. The list of known AMR genes, however, is still far from comprehensive and may result in inaccurate AMR pathogen predictions. We thus felt the need to expand the AMR gene set and proposed a pan-genome-based feature selection method to identify potential gene sets for AMR prediction purposes. Results: By building pan-genome datasets and extracting gene presence/absence patterns from four bacterial species, each with more than 2000 strains, we showed that machine learning models built from pan-genome data can be very promising for predicting AMR pathogens. The gene set selected by the eXtreme Gradient Boosting (XGBoost) feature selection approach further improved prediction outcomes, and an incremental approach selecting subsets of XGBoost-selected features brought the machine learning model performance to the next level. Investigating selected gene sets revealed that on average about 50% of genes had no known function and very few of them were known AMR genes, indicating the potential of the selected gene sets to expand resistance gene repertoires. Conclusions: We demonstrated that a pan-genome-based feature selection approach is suitable for building machine learning models for predicting AMR pathogens. The extracted gene sets may provide future clues to expand our knowledge of known AMR genes and provide novel hypotheses for inferring bacterial AMR mechanisms.},
author = {Ming Ren Yang and Yu Wei Wu},
doi = {10.1186/S12859-022-04666-2/FIGURES/4},
issn = {14712105},
issue = {4},
journal = {BMC Bioinformatics},
keywords = {Antimicrobial resistance,Feature selection,Hypothetical proteins,Pan-genome,XGBoost,eXtreme gradient boosting},
month = {4},
pages = {1-15},
pmid = {35428201},
publisher = {BioMed Central Ltd},
title = {Enhancing predictions of antimicrobial resistance of pathogens by expanding the potential resistance gene repertoire using a pan-genome-based feature selection approach},
volume = {23},
url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-022-04666-2},
year = {2022},
}
@article{Li2020,
abstract = {Antimicrobial resistance (AMR) has emerged as one of the most urgent global threats to public health. Accurate detection of AMR phenotypes is critical for reducing the spread of AMR strains. Here, we developed PARMAP (Prediction of Antimicrobial Resistance by MAPping genetic alterations in pan-genome) to predict AMR phenotypes and to identify AMR-associated genetic alterations based on the pan-genome of bacteria by utilizing machine learning algorithms. When we applied PARMAP to 1,597 Neisseria gonorrhoeae strains, it successfully predicted their AMR phenotypes based on a pan-genome analysis. Furthermore, it identified 328 genetic alterations in 23 known AMR genes and discovered many new AMR-associated genetic alterations in ciprofloxacin-resistant N. gonorrhoeae, and it clearly indicated the genetic heterogeneity of AMR genes in different subtypes of resistant N. gonorrhoeae. Additionally, PARMAP performed well in predicting the AMR phenotypes of Mycobacterium tuberculosis and Escherichia coli, indicating the robustness of the PARMAP framework. In conclusion, PARMAP not only precisely predicts the AMR of a population of strains of a given species but also uses whole-genome sequencing data to prioritize candidate AMR-associated genetic alterations based on their likelihood of contributing to AMR. Thus, we believe that PARMAP will accelerate investigations into AMR mechanisms in other human pathogens.},
author = {Xuefei Li and Jingxia Lin and Yongfei Hu and Jiajian Zhou},
doi = {10.3389/fmicb.2020.578795},
issn = {1664302X},
journal = {Frontiers in Microbiology},
keywords = {AMR prediction,Neisseria gonorrhoeae,antibiotic resistance genes,antimicrobial resistance (AMR),machine learning (ML),pan-genome},
month = {10},
publisher = {Frontiers Media S.A.},
title = {PARMAP: A Pan-Genome-Based Computational Framework for Predicting Antimicrobial Resistance},
volume = {11},
year = {2020},
}
@article{Moradigaravand2018,
abstract = {The emergence of microbial antibiotic resistance is a global health threat. In clinical settings, the key to controlling spread of resistant strains is accurate and rapid detection. As traditional culture-based methods are time consuming, genetic approaches have recently been developed for this task. The detection of antibiotic resistance is typically made by measuring a few known determinants previously identified from genome sequencing, and thus requires the prior knowledge of its biological mechanisms. To overcome this limitation, we employed machine learning models to predict resistance to 11 compounds across four classes of antibiotics from existing and novel whole genome sequences of 1936 E. coli strains. We considered a range of methods, and examined population structure, isolation year, gene content, and polymorphism information as predictors. Gradient boosted decision trees consistently outperformed alternative models with an average accuracy of 0.91 on held-out data (range 0.81–0.97). While the best models most frequently employed gene content, an average accuracy score of 0.79 could be obtained using population structure information alone. Single nucleotide variation data were less useful, and significantly improved prediction only for two antibiotics, including ciprofloxacin. These results demonstrate that antibiotic resistance in E. coli can be accurately predicted from whole genome sequences without a priori knowledge of mechanisms, and that both genomic and epidemiological data can be informative. This paves way to integrating machine learning approaches into diagnostic tools in the clinic.},
author = {Danesh Moradigaravand and Martin Palm and Anne Farewell and Ville Mustonen and Jonas Warringer and Leopold Parts},
doi = {10.1371/JOURNAL.PCBI.1006258},
isbn = {1111111111},
issn = {1553-7358},
issue = {12},
journal = {PLOS Computational Biology},
keywords = {Antibiotic resistance,Antibiotics,Decision tree learning,Forecasting,Genetics,Genomics,Machine learning,Single nucleotide polymorphisms},
month = {12},
pages = {e1006258},
pmid = {30550564},
publisher = {Public Library of Science},
title = {Prediction of antibiotic resistance in Escherichia coli from large-scale pan-genome data},
volume = {14},
url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006258},
year = {2018},
}
@article{pei2024argnet,
title={ARGNet: using deep neural networks for robust identification and classification of antibiotic resistance genes from sequences},
author={Pei, Yao and Shum, Marcus Ho-Hin and Liao, Yunshi and Leung, Vivian W and Gong, Yu-Nong and Smith, David K and Yin, Xiaole and Guan, Yi and Luo, Ruibang and Zhang, Tong and others},
journal={Microbiome},
volume={12},
number={1},
pages={1--17},
year={2024},
publisher={BioMed Central}
}
@article{jaillard2018fast,
title={A fast and agnostic method for bacterial genome-wide association studies: Bridging the gap between k-mers and genetic events},
author={Jaillard, Magali and Lima, Leandro and Tournoud, Maud and Mah{\'e}, Pierre and Van Belkum, Alex and Lacroix, Vincent and Jacob, Laurent},
journal={PLoS genetics},
volume={14},
number={11},
pages={e1007758},
year={2018},
publisher={Public Library of Science San Francisco, CA USA}
}
@article{brynildsrud2016rapid,
title={Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary},
author={Brynildsrud, Ola and Bohlin, Jon and Scheffer, Lonneke and Eldholm, Vegard},
journal={Genome biology},
volume={17},
pages={1--9},
year={2016},
publisher={Springer}
}
@article{charitou2016using,
title={Using biological networks to integrate, visualize and analyze genomics data},
author={Charitou, Theodosia and Bryan, Kenneth and Lynn, David J},
journal={Genetics Selection Evolution},
volume={48},
pages={1--12},
year={2016},
publisher={Springer}
}
@article{schlitt2007current,
title={Current approaches to gene regulatory network modelling},
author={Schlitt, Thomas and Brazma, Alvis},
journal={BMC bioinformatics},
volume={8},
pages={1--22},
year={2007},
publisher={Springer}
}
@article{vernikos2015ten,
title={Ten years of pan-genome analyses},
author={Vernikos, George and Medini, Duccio and Riley, David R and Tettelin, Herve},
journal={Current opinion in microbiology},
volume={23},
pages={148--154},
year={2015},
publisher={Elsevier}
}
@article{smoot2011cytoscape,
title={Cytoscape 2.8: new features for data integration and network visualization},
author={Smoot, Michael E and Ono, Keiichiro and Ruscheinski, Johannes and Wang, Peng-Liang and Ideker, Trey},
journal={Bioinformatics},
volume={27},
number={3},
pages={431--432},
year={2011},
publisher={Oxford University Press}
}
@article{piper2024evolutionary,
title={Evolutionary dynamics of the accessory genomes of Staphylococcus aureus},
author={Piper, Kathryn R and Ikhimiukor, Odion O and Souza, Stephanie SR and Garcia-Aroca, Teddy and Andam, Cheryl P},
journal={Msphere},
pages={e00751--23},
year={2024},
publisher={Am Soc Microbiol}
}
@article{her2021pangenomenet,
title={PangenomeNet: a pan-genome-based network reveals functional modules on antimicrobial resistome for Escherichia coli strains},
author={Her, Hsuan-Lin and Lin, Po-Ting and Wu, Yu-Wei},
journal={BMC bioinformatics},
volume={22},
pages={1--19},
year={2021},
publisher={Springer}
}
@article{beavan2024contingency,
title={Contingency, repeatability, and predictability in the evolution of a prokaryotic pangenome},
author={Beavan, Alan JS and Domingo-Sananes, Maria Rosa and McInerney, James O},
journal={Proceedings of the National Academy of Sciences},
volume={121},
number={1},
pages={e2304934120},
year={2024},
publisher={National Acad Sciences}
}
@article{yang2022enhancing,
title={Enhancing predictions of antimicrobial resistance of pathogens by expanding the potential resistance gene repertoire using a pan-genome-based feature selection approach},
author={Yang, Ming-Ren and Wu, Yu-Wei},
journal={BMC bioinformatics},
volume={23},
number={Suppl 4},
pages={131},
year={2022},
publisher={Springer}
}
@article{nadeau2001modifier,
title={Modifier genes in mice and humans},
author={Nadeau, Joseph H},
journal={Nature Reviews Genetics},
volume={2},
number={3},
pages={165--174},
year={2001},
publisher={Nature Publishing Group UK London}
}
@book{James2013,
added-at = {2019-10-12T20:03:56.000+0200},
author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
biburl = {https://www.bibsonomy.org/bibtex/2444186c86d18bddb4433c12fa126f6be/lopusz_kdd},
interhash = {b3febabdc45a8629023cee7323dfbd86},
intrahash = {444186c86d18bddb4433c12fa126f6be},
keywords = {general_machine_learning},
publisher = {Springer},
timestamp = {2019-10-12T23:45:37.000+0200},
title = {An Introduction to Statistical Learning: with Applications in R },
url = {https://faculty.marshall.usc.edu/gareth-james/ISL/},
year = 2013
}
@book{horvath2011weighted,
title={Weighted Network Analysis: Applications in Genomics and Systems Biology},
author={Horvath, S.},
isbn={9781441988195},
lccn={2011925163},
series={SpringerLink : B{\"u}cher},
url={https://books.google.com.lb/books?id=ZCh06NgMFesC},
year={2011},
publisher={Springer New York}
}
@article{ren2022prediction,
title={Prediction of antimicrobial resistance based on whole-genome sequencing and machine learning},
author={Ren, Yunxiao and Chakraborty, Trinad and Doijad, Swapnil and Falgenhauer, Linda and Falgenhauer, Jane and Goesmann, Alexander and Hauschild, Anne-Christin and Schwengers, Oliver and Heider, Dominik},
journal={Bioinformatics},
volume={38},
number={2},
pages={325--334},
year={2022},
publisher={Oxford University Press}
}
@article{nsubuga2024generalizability,
title={Generalizability of machine learning in predicting antimicrobial resistance in E. coli: a multi-country case study in Africa},
author={Nsubuga, Mike and Galiwango, Ronald and Jjingo, Daudi and Mboowa, Gerald},
journal={BMC genomics},
volume={25},
number={1},
pages={287},
year={2024},
publisher={Springer}
}
@article{ren2022deep,
title={Deep transfer learning enables robust prediction of antimicrobial resistance for novel antibiotics},
author={Ren, Yunxiao and Chakraborty, Trinad and Doijad, Swapnil and Falgenhauer, Linda and Falgenhauer, Jane and Goesmann, Alexander and Schwengers, Oliver and Heider, Dominik},
journal={Antibiotics},
volume={11},
number={11},
pages={1611},
year={2022},
publisher={MDPI}
}
@article{tang2022machine,
title={Machine learning in predicting antimicrobial resistance: a systematic review and meta-analysis},
author={Tang, Rui and Luo, Rui and Tang, Shiwei and Song, Haoxin and Chen, Xiujuan},
journal={International Journal of Antimicrobial Agents},
volume={60},
number={5-6},
pages={106684},
year={2022},
publisher={Elsevier}
}
@inproceedings{provost2000machine,
title={Machine learning from imbalanced data sets 101},
author={Provost, Foster},
booktitle={Proceedings of the AAAI’2000 workshop on imbalanced data sets},
volume={68},
number={2000},
pages={1--3},
year={2000},
organization={AAAI Press}
}
@article{valizadehaslani2020amino,
title={Amino acid K-mer feature extraction for quantitative antimicrobial resistance (AMR) prediction by machine learning and model interpretation for biological insights},
author={ValizadehAslani, Taha and Zhao, Zhengqiao and Sokhansanj, Bahrad A and Rosen, Gail L},
journal={Biology},
volume={9},
number={11},
pages={365},
year={2020},
publisher={MDPI}
}
@article{yang2023cross,
title={A Cross-Validated Feature Selection (CVFS) approach for extracting the most parsimonious feature sets and discovering potential antimicrobial resistance (AMR) biomarkers},
author={Yang, Ming-Ren and Wu, Yu-Wei},
journal={Computational and Structural Biotechnology Journal},
volume={21},
pages={769--779},
year={2023},
publisher={Elsevier}
}
@article{shannon2001mathematical,
title={A mathematical theory of communication},
author={Shannon, Claude Elwood},
journal={ACM SIGMOBILE mobile computing and communications review},
volume={5},
number={1},
pages={3--55},
year={2001},
publisher={ACM New York, NY, USA}
}
@article{peng2018metapgn,
title={MetaPGN: a pipeline for construction and graphical visualization of annotated pangenome networks},
author={Peng, Ye and Tang, Shanmei and Wang, Dan and Zhong, Huanzi and Jia, Huijue and Cai, Xianghang and Zhang, Zhaoxi and Xiao, Minfeng and Yang, Huanming and Wang, Jian and others},
journal={GigaScience},
volume={7},
number={11},
pages={giy121},
year={2018},
publisher={Oxford University Press}
}
@article{medini2005microbial,
title={The microbial pan-genome},
author={Medini, Duccio and Donati, Claudio and Tettelin, Herv{\'e} and Masignani, Vega and Rappuoli, Rino},
journal={Current opinion in genetics \& development},
volume={15},
number={6},
pages={589--594},
year={2005},
publisher={Elsevier}
}
@article{zhang2005general,
title={A general framework for weighted gene co-expression network analysis},
author={Zhang, Bin and Horvath, Steve},
journal={Statistical applications in genetics and molecular biology},
volume={4},
number={1},
year={2005},
publisher={De Gruyter}
}
@article{aguilar2023tool,
title={A tool to enhance antimicrobial stewardship using similarity networks to identify antimicrobial resistance patterns across farms},
author={Aguilar-Vega, Cecilia and Scoglio, Caterina and Clavijo, Mar{\'\i}a J and Robbins, Rebecca and Karriker, Locke and Liu, Xin and Mart{\'\i}nez-L{\'o}pez, Beatriz},
journal={Scientific Reports},
volume={13},
number={1},
pages={2931},
year={2023},
publisher={Nature Publishing Group UK London}
}
@article{barabasi2004network,
title={Network biology: understanding the cell's functional organization},
author={Barabasi, Albert-Laszlo and Oltvai, Zoltan N},
journal={Nature reviews genetics},
volume={5},
number={2},
pages={101--113},
year={2004},
publisher={Nature Publishing Group UK London}
}
@article{muzio2021biological,
title={Biological network analysis with deep learning},
author={Muzio, Giulia and O’Bray, Leslie and Borgwardt, Karsten},
journal={Briefings in bioinformatics},
volume={22},
number={2},
pages={1515--1530},
year={2021},
publisher={Oxford University Press}
}
@article{liu2020computational,
title={Computational network biology: data, models, and applications},
author={Liu, Chuang and Ma, Yifang and Zhao, Jing and Nussinov, Ruth and Zhang, Yi-Cheng and Cheng, Feixiong and Zhang, Zi-Ke},
journal={Physics Reports},
volume={846},
pages={1--66},
year={2020},
publisher={Elsevier}
}
@article{lees2018pyseer,
title={Pyseer: a comprehensive tool for microbial pangenome-wide association studies},
author={Lees, John A and Galardini, Marco and Bentley, Stephen D and Weiser, Jeffrey N and Corander, Jukka},
journal={Bioinformatics},
volume={34},
number={24},
pages={4310--4312},
year={2018},
publisher={Oxford University Press}
}
@article{arango2018deeparg,
title={DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data},
author={Arango-Argoty, Gustavo and Garner, Emily and Pruden, Amy and Heath, Lenwood S and Vikesland, Peter and Zhang, Liqing},
journal={Microbiome},
volume={6},
pages={1--15},
year={2018},
publisher={Springer}
}
@misc{Kim2022,
abstract = {SUMMARY Antimicrobial resistance (AMR) is a global health crisis that poses a great threat to modern medicine. Effective prevention strategies are urgently required to slow the emergence and further dissemination of AMR. Given the availability of data sets encompassing hundreds or thousands of pathogen genomes, machine learning (ML) is increasingly being used to predict resistance to different antibiotics in pathogens based on gene content and genome composition. A key objective of this work is to advocate for the incorporation of ML into front-line settings but also highlight the further refinements that are necessary to safely and confidently incorporate these methods. The question of what to predict is not trivial given the existence of different quantitative and qualitative laboratory measures of AMR. ML models typically treat genes as independent predictors, with no consideration of structural and functional linkages; they also may not be accurate when new mutational variants of known AMR genes emerge. Finally, to have the technology trusted by end users in public health settings, ML models need to be transparent and explainable to ensure that the basis for prediction is clear. We strongly advocate that the next set of AMR-ML studies should focus on the refinement of these limitations to be able to bridge the gap to diagnostic implementation.},
author = {Jee In Kim and Finlay Maguire and Kara K. Tsang and Theodore Gouliouris and Sharon J. Peacock and Tim A. McAllister and Andrew G. McArthur and Robert G. Beiko},
doi = {10.1128/cmr.00179-21},
issn = {10986618},
issue = {3},
journal = {Clinical Microbiology Reviews},
keywords = {antimicrobial resistance,machine learning},
month = {9},
pmid = {35612324},
publisher = {American Society for Microbiology},
title = {Machine Learning for Antimicrobial Resistance Prediction: Current Practice, Limitations, and Clinical Perspective},
volume = {35},
year = {2022},
}
@article{,
abstract = {<p>Antibiotic resistance is a significant threat to public health worldwide. Genome-wide association studies (GWAS) have emerged as a powerful tool to identify genetic variants associated with this antibiotic resistance. By analyzing large datasets of bacterial genomes, GWAS can provide valuable insights into the resistance mechanisms and facilitate the discovery of new drug targets. The present study aimed to undertake a systematic review of different GWAS approaches used for detecting genetic variants associated with antibiotic resistance. We comprehensively searched the PubMed and Scopus databases to identify relevant studies published from 2013 to February 2023. A total of 40 studies met our inclusion criteria. These studies explored a wide range of bacterial species, antibiotics, and study designs. Notably, most of the studies were centered around human pathogens such as Mycobacterium tuberculosis, Escherichia coli, Neisseria gonorrhoeae, and Staphylococcus aureus. The review seeks to explore the several GWAS approaches utilized to investigate the genetic mechanisms associated with antibiotic resistance. Furthermore, it examines the contributions of GWAS approaches in identifying resistance-associated genetic variants through binary and continuous phenotypes. Overall, GWAS holds great potential to enhance our understanding of bacterial resistance and improve strategies to combat infectious diseases.</p>},
author = {Jeanneth Mosquera-Rendón and Claudia Ximena Moreno-Herrera and Jaime Robledo and Uriel Hurtado-Páez},
doi = {10.3390/microorganisms11122866},
issn = {2076-2607},
issue = {12},
journal = {Microorganisms},
keywords = {antimicrobial resistance,bacteria,genetic variants,genome-wide association study},
month = {11},
pages = {2866},
publisher = {Multidisciplinary Digital Publishing Institute (MDPI)},
title = {Genome-Wide Association Studies (GWAS) Approaches for the Detection of Genetic Variants Associated with Antibiotic Resistance: A Systematic Review},
volume = {11},
url = {https://www.mdpi.com/2076-2607/11/12/2866},
year = {2023},
}
@article{Davis2016,
abstract = {The emergence and spread of antimicrobial resistance (AMR) mechanisms in bacterial pathogens, coupled with the dwindling number of effective antibiotics, has created a global health crisis. Being able to identify the genetic mechanisms of AMR and predict the resistance phenotypes of bacterial pathogens prior to culturing could inform clinical decision-making and improve reaction time. At PATRIC (http://patricbrc.org/), we have been collecting bacterial genomes with AMR metadata for several years. In order to advance phenotype prediction and the identification of genomic regions relating to AMR, we have updated the PATRIC FTP server to enable access to genomes that are binned by their AMR phenotypes, as well as metadata including minimum inhibitory concentrations. Using this infrastructure, we custom built AdaBoost (adaptive boosting) machine learning classifiers for identifying carbapenem resistance in Acinetobacter baumannii, methicillin resistance in Staphylococcus aureus, and beta-lactam and co-trimoxazole resistance in Streptococcus pneumoniae with accuracies ranging from 88-99%. We also did this for isoniazid, kanamycin, ofloxacin, rifampicin, and streptomycin resistance in Mycobacterium tuberculosis, achieving accuracies ranging from 71-88%. This set of classifiers has been used to provide an initial framework for species-specific AMR phenotype and genomic feature prediction in the RAST and PATRIC annotation services.},
author = {James J. Davis and Sébastien Boisvert and Thomas Brettin and Ronald W. Kenyon and Chunhong Mao and Robert Olson and Ross Overbeek and John Santerre and Maulik Shukla and Alice R. Wattam and Rebecca Will and Fangfang Xia and Rick Stevens},
doi = {10.1038/srep27930},
issn = {20452322},
journal = {Scientific Reports},
month = {6},
pmid = {27297683},
publisher = {Nature Publishing Group},
title = {Antimicrobial Resistance Prediction in PATRIC and RAST},
volume = {6},
year = {2016},
}
@misc{Lv2021,
abstract = {The wide use and abuse of antibiotics could make antimicrobial resistance (AMR) an increasingly serious issue that threatens global health and imposes an enormous burden on society and the economy. To avoid the crisis of AMR, we have to fundamentally change our approach. Artificial intelligence (AI) represents a new paradigm to combat AMR. Thus, various AI approaches to this problem have sprung up, some of which may be considered successful cases of domain-specific AI applications in AMR. However, to the best of our knowledge, there is no systematic review illustrating the use of these AI-based applications for AMR. Therefore, this review briefly introduces how to employ AI technology against AMR by using the predictive AMR model, the rational use of antibiotics, antimicrobial peptides (AMPs) and antibiotic combinations, as well as future research directions.},
author = {Ji Lv and Senyi Deng and Le Zhang},
doi = {10.1016/j.bsheal.2020.08.003},
issn = {25900536},
issue = {1},
journal = {Biosafety and Health},
keywords = {Antimicrobial resistance,Artificial intelligence,Clinical decision support systems,Drug combinations,Whole-genome sequencing},
month = {2},
pages = {22-31},
publisher = {Elsevier B.V.},
title = {A review of artificial intelligence applications for antimicrobial resistance},
volume = {3},
year = {2021},
}
@article{Hyun2023,
abstract = {Surveillance programs for managing antimicrobial resistance (AMR) have yielded thousands of genomes suited for data-driven mechanism discovery. We present a workflow integrating pangenomics, gene annotation, and machine learning to identify AMR genes at scale. When applied to 12 species, 27,155 genomes, and 69 drugs, we 1) find AMR gene transfer mostly confined within related species, with 925 genes in multiple species but just eight in multiple phylogenetic classes, 2) demonstrate that discovery-oriented support vector machines outperform contemporary methods at recovering known AMR genes, recovering 263 genes compared to 145 by Pyseer, and 3) identify 142 AMR gene candidates. Validation of two candidates in E. coli BW25113 reveals cases of conditional resistance: ΔcycA confers ciprofloxacin resistance in minimal media with D-serine, and frdD V111D confers ampicillin resistance in the presence of ampC by modifying the overlapping promoter. We expect this approach to be adaptable to other species and phenotypes.},
author = {Jason C. Hyun and Jonathan M. Monk and Richard Szubin and Ying Hefner and Bernhard O. Palsson},
doi = {10.1038/s41467-023-43549-9},
issn = {20411723},
issue = {1},
journal = {Nature Communications},
month = {12},
pmid = {38001096},
publisher = {Nature Research},
title = {Global pathogenomic analysis identifies known and candidate genetic antimicrobial resistance determinants in twelve species},
volume = {14},
year = {2023},
}
@misc{Power2016,
abstract = {The reduced costs of sequencing have led to whole-genome sequences for a large number of microorganisms, enabling the application of microbial genome-wide association studies (GWAS). Given the successes of human GWAS in understanding disease aetiology and identifying potential drug targets, microbial GWAS are likely to further advance our understanding of infectious diseases. These advances include insights into pressing global health problems, such as antibiotic resistance and disease transmission. In this Review, we outline the methodologies of GWAS, the current state of the field of microbial GWAS, and how lessons from human GWAS can direct the future of the field.},
author = {Robert A. Power and Julian Parkhill and Tulio De Oliveira},
doi = {10.1038/nrg.2016.132},
issn = {14710064},
issue = {1},
journal = {Nature Reviews Genetics},
month = {12},
pages = {41-50},
pmid = {27840430},
publisher = {Nature Publishing Group},
title = {Microbial genome-wide association studies: lessons from human GWAS},
volume = {18},
year = {2016},
}
@article{Anusha2023,
abstract = {Antimicrobial resistance has caused chaos worldwide due to the depiction of multidrug-resistant (MDR) infective microorganisms. A thorough examination of antimicrobial resistance (AMR) genes and associated resistant mechanisms is vital to solving this problem. Clostridium difficile (C. difficile) is an opportunistic nosocomial bacterial strain that has acquired exogenous AMR genes that confer resistance to antimicrobials such as erythromycin, azithromycin, clarithromycin, rifampicin, moxifloxacin, fluoroquinolones, vancomycin, and others. A network of interactions, including 20 AMR genes, was created and analyzed. In functional enrichment analysis, Cellular components (CC), Molecular Functions (MF), and Biological Processes (BP) were discovered to have substantial involvement. Mutations in the rpl genes, which encode ribosomal proteins, confer resistance in Gram-positive bacteria. Full erythromycin and azithromycin cross-resistance can be conferred if more than one of the abovementioned genes is present. In the enriched BP, rps genes related to transcriptional regulation and biosynthesis were found. The genes belong to the rpoB gene family, which has previously been related to rifampicin resistance. The genes rpoB, gyrA, gyrB, rpoS, rpl genes, rps genes, and Van genes are thought to be the hub genes implicated in resistance in C. difficile. As a result, new medications could be developed using these genes. Overall, our observations provide a thorough understanding of C. difficile AMR mechanisms.},
author = {M. Anusha and V. Tejaswini and S. Udhaya Kumar and C.N. Prashantha and Karthick Vasudevan and C. George Priya Doss},
doi = {10.1016/j.micpath.2023.106083},
issn = {08824010},
journal = {Microbial Pathogenesis},
keywords = {Clostridium difficile,Clustering analysis,Functional enrichment analysis,Gene ontology,Topological parameters},
month = {5},
pages = {106083},
publisher = {Academic Press},
title = {Gene network interaction analysis to elucidate the antimicrobial resistance mechanisms in the Clostridium difficile},
volume = {178},
url = {https://linkinghub.elsevier.com/retrieve/pii/S088240102300116X},
year = {2023},
}
@misc{Kim2020,
abstract = {Biological knowledge accumulated over the decades and advances in computational methods have facilitated the implementation of pan-genome analysis that aims at better understanding of genotype-phenotype associations of a specific group of organisms. Pan-genome analysis has been shown to be an effective approach to better understand a clade of pathogenic bacteria because it helps developing various and tailored therapeutic strategies on the basis of their biological similarities and differences. Here, we review recent progress in the pan-genome analysis of pathogenic bacteria. In particular, we focus on computational tools that allow streamlined pan-genome analysis. Also, various applications of pan-genome analysis including those relevant to devising strategies for the prevention and treatment of pathogenic bacteria are reviewed.},
author = {Yeji Kim and Changdai Gu and Hyun Uk Kim and Sang Yup Lee},
doi = {10.1016/j.copbio.2019.12.001},
issn = {18790429},
journal = {Current Opinion in Biotechnology},
month = {6},
pages = {54-62},
pmid = {31891864},
publisher = {Elsevier Ltd},
title = {Current status of pan-genome analysis for pathogenic bacteria},
volume = {63},
year = {2020},
}
@misc{Kim2020,
abstract = {Biological knowledge accumulated over the decades and advances in computational methods have facilitated the implementation of pan-genome analysis that aims at better understanding of genotype-phenotype associations of a specific group of organisms. Pan-genome analysis has been shown to be an effective approach to better understand a clade of pathogenic bacteria because it helps developing various and tailored therapeutic strategies on the basis of their biological similarities and differences. Here, we review recent progress in the pan-genome analysis of pathogenic bacteria. In particular, we focus on computational tools that allow streamlined pan-genome analysis. Also, various applications of pan-genome analysis including those relevant to devising strategies for the prevention and treatment of pathogenic bacteria are reviewed.},
author = {Yeji Kim and Changdai Gu and Hyun Uk Kim and Sang Yup Lee},
doi = {10.1016/j.copbio.2019.12.001},
issn = {18790429},
journal = {Current Opinion in Biotechnology},
month = {6},
pages = {54-62},
pmid = {31891864},
publisher = {Elsevier Ltd},
title = {Current status of pan-genome analysis for pathogenic bacteria},
volume = {63},
year = {2020},
}
@article{Kavvas2018,
abstract = {<p> <italic>Mycobacterium tuberculosis</italic> is a serious human pathogen threat exhibiting complex evolution of antimicrobial resistance (AMR). Accordingly, the many publicly available datasets describing its AMR characteristics demand disparate data-type analyses. Here, we develop a reference strain-agnostic computational platform that uses machine learning approaches, complemented by both genetic interaction analysis and 3D structural mutation-mapping, to identify signatures of AMR evolution to 13 antibiotics. This platform is applied to 1595 sequenced strains to yield four key results. First, a pan-genome analysis shows that <italic>M. tuberculosis</italic> is highly conserved with sequenced variation concentrated in PE/PPE/PGRS genes. Second, the platform corroborates 33 genes known to confer resistance and identifies 24 new genetic signatures of AMR. Third, 97 epistatic interactions across 10 resistance classes are revealed. Fourth, detailed structural analysis of these genes yields mechanistic bases for their selection. The platform can be used to study other human pathogens. </p>},
author = {Erol S. Kavvas and Edward Catoiu and Nathan Mih and James T. Yurkovich and Yara Seif and Nicholas Dillon and David Heckmann and Amitesh Anand and Laurence Yang and Victor Nizet and Jonathan M. Monk and Bernhard O. Palsson},
doi = {10.1038/s41467-018-06634-y},
issn = {2041-1723},
issue = {1},
journal = {Nature Communications},
month = {10},
pages = {4306},
publisher = {Nature Publishing Group},
title = {Machine learning and structural analysis of Mycobacterium tuberculosis pan-genome identifies genetic signatures of antibiotic resistance},
volume = {9},
url = {https://www.nature.com/articles/s41467-018-06634-y},
year = {2018},
}
@article{Sun2021,
abstract = {<p>Over the last decade, genome-wide association studies (GWAS) have discovered thousands of genetic variants underlying complex human diseases and agriculturally important traits. These findings have been utilized to dissect the biological basis of diseases, to develop new drugs, to advance precision medicine and to boost breeding. However, the potential of GWAS is still underexploited due to methodological limitations. Many challenges have emerged, including detecting epistasis and single-nucleotide polymorphisms (SNPs) with small effects and distinguishing causal variants from other SNPs associated through linkage disequilibrium. These issues have motivated advancements in GWAS analyses in two contrasting cultures—statistical modelling and machine learning. In this review, we systematically present the basic concepts and the benefits and limitations in both methods. We further discuss recent efforts to mitigate their weaknesses. Additionally, we summarize the state-of-the-art tools for detecting the missed signals, ultrarare mutations and gene–gene interactions and for prioritizing SNPs. Our work can offer both theoretical and practical guidelines for performing GWAS analyses and for developing further new robust methods to fully exploit the potential of GWAS.</p>},