@article{14716,
  abstract     = {Background: Antimicrobial resistance (AMR) poses a significant global health threat, and an accurate prediction of bacterial resistance patterns is critical for effective treatment and control strategies. In recent years, machine learning (ML) approaches have emerged as powerful tools for analyzing large-scale bacterial AMR data. However, ML methods often ignore evolutionary relationships among bacterial strains, which can greatly impact performance of the ML methods, especially if resistance-associated features are attempted to be detected. Genome-wide association studies (GWAS) methods like linear mixed models accounts for the evolutionary relationships in bacteria, but they uncover only highly significant variants which have already been reported in literature.

Results: In this work, we introduce a novel phylogeny-related parallelism score (PRPS), which measures whether a certain feature is correlated with the population structure of a set of samples. We demonstrate that PRPS can be used, in combination with SVM- and random forest-based models, to reduce the number of features in the analysis, while simultaneously increasing models’ performance. We applied our pipeline to publicly available AMR data from PATRIC database for Mycobacterium tuberculosis against six common antibiotics.

Conclusions: Using our pipeline, we re-discovered known resistance-associated mutations as well as new candidate mutations which can be related to resistance and not previously reported in the literature. We demonstrated that taking into account phylogenetic relationships not only improves the model performance, but also yields more biologically relevant predicted most contributing resistance markers.},
  author       = {Yurtseven, Alper and Buyanova, Sofia and Agrawal, Amay Ajaykumar A. and Bochkareva, Olga and Kalinina, Olga V V.},
  issn         = {1471-2180},
  journal      = {BMC Microbiology},
  number       = {1},
  publisher    = {Springer Nature},
  title        = {{Machine learning and phylogenetic analysis allow for predicting antibiotic resistance in M. tuberculosis}},
  doi          = {10.1186/s12866-023-03147-7},
  volume       = {23},
  year         = {2023},
}

@article{10927,
  abstract     = {Motivation
High plasticity of bacterial genomes is provided by numerous mechanisms including horizontal gene transfer and recombination via numerous flanking repeats. Genome rearrangements such as inversions, deletions, insertions and duplications may independently occur in different strains, providing parallel adaptation or phenotypic diversity. Specifically, such rearrangements might be responsible for virulence, antibiotic resistance and antigenic variation. However, identification of such events requires laborious manual inspection and verification of phyletic pattern consistency.
Results
Here, we define the term ‘parallel rearrangements’ as events that occur independently in phylogenetically distant bacterial strains and present a formalization of the problem of parallel rearrangements calling. We implement an algorithmic solution for the identification of parallel rearrangements in bacterial populations as a tool PaReBrick. The tool takes a collection of strains represented as a sequence of oriented synteny blocks and a phylogenetic tree as input data. It identifies rearrangements, tests them for consistency with a tree, and sorts the events by their parallelism score. The tool provides diagrams of the neighbors for each block of interest, allowing the detection of horizontally transferred blocks or their extra copies and the inversions in which copied blocks are involved. We demonstrated PaReBrick’s efficiency and accuracy and showed its potential to detect genome rearrangements responsible for pathogenicity and adaptation in bacterial genomes.},
  author       = {Zabelkin, Alexey and Yakovleva, Yulia and Bochkareva, Olga and Alexeev, Nikita},
  issn         = {1460-2059},
  journal      = {Bioinformatics},
  number       = {2},
  pages        = {357--363},
  publisher    = {Oxford Academic},
  title        = {{PaReBrick: PArallel REarrangements and BReaks identification toolkit}},
  doi          = {10.1093/bioinformatics/btab691},
  volume       = {38},
  year         = {2022},
}

@article{11344,
  abstract     = {Until recently, Shigella and enteroinvasive Escherichia coli were thought to be primate-restricted pathogens. The base of their pathogenicity is the type 3 secretion system (T3SS) encoded by the pINV virulence plasmid, which facilitates host cell invasion and subsequent proliferation. A large family of T3SS effectors, E3 ubiquitin-ligases encoded by the ipaH genes, have a key role in the Shigella pathogenicity through the modulation of cellular ubiquitination that degrades host proteins. However, recent genomic studies identified ipaH genes in the genomes of Escherichia marmotae, a potential marmot pathogen, and an E. coli extracted from fecal samples of bovine calves, suggesting that non-human hosts may also be infected by these strains, potentially pathogenic to humans. We performed a comparative genomic study of the functional repertoires in the ipaH gene family in Shigella and enteroinvasive Escherichia from human and predicted non-human hosts. We found that fewer than half of Shigella genomes had a complete set of ipaH genes, with frequent gene losses and duplications that were not consistent with the species tree and nomenclature. Non-human host IpaH proteins had a diverse set of substrate-binding domains and, in contrast to the Shigella proteins, two variants of the NEL C-terminal domain. Inconsistencies between strains phylogeny and composition of effectors indicate horizontal gene transfer between E. coli adapted to different hosts. These results provide a framework for understanding of ipaH-mediated host-pathogens interactions and suggest a need for a genomic study of fecal samples from diseased animals.},
  author       = {Dranenko, NO and Tutukina, MN and Gelfand, MS and Kondrashov, Fyodor and Bochkareva, Olga},
  issn         = {2045-2322},
  journal      = {Scientific Reports},
  publisher    = {Springer Nature},
  title        = {{Chromosome-encoded IpaH ubiquitin ligases indicate non-human enteroinvasive Escherichia}},
  doi          = {10.1038/s41598-022-10827-3},
  volume       = {12},
  year         = {2022},
}

@article{9380,
  abstract     = {Shigella are pathogens originating within the Escherichia lineage but frequently classified as a separate genus. Shigella genomes contain numerous insertion sequences (ISs) that lead to pseudogenisation of affected genes and an increase of non-homologous recombination. Here, we study 414 genomes of E. coli and Shigella strains to assess the contribution of genomic rearrangements to Shigella evolution. We found that Shigella experienced exceptionally high rates of intragenomic rearrangements and had a decreased rate of homologous recombination compared to pathogenic and non-pathogenic E. coli. The high rearrangement rate resulted in independent disruption of syntenic regions and parallel rearrangements in different Shigella lineages. Specifically, we identified two types of chromosomally encoded E3 ubiquitin-protein ligases acquired independently by all Shigella strains that also showed a high level of sequence conservation in the promoter and further in the 5′-intergenic region. In the only available enteroinvasive E. coli (EIEC) strain, which is a pathogenic E. coli with a phenotype intermediate between Shigella and non-pathogenic E. coli, we found a rate of genome rearrangements comparable to those in other E. coli and no functional copies of the two Shigella-specific E3 ubiquitin ligases. These data indicate that the accumulation of ISs influenced many aspects of genome evolution and played an important role in the evolution of intracellular pathogens. Our research demonstrates the power of comparative genomics-based on synteny block composition and an important role of non-coding regions in the evolution of genomic islands.},
  author       = {Seferbekova, Zaira and Zabelkin, Alexey and Yakovleva, Yulia and Afasizhev, Robert and Dranenko, Natalia O. and Alexeev, Nikita and Gelfand, Mikhail S. and Bochkareva, Olga},
  issn         = {1664-302X},
  journal      = {Frontiers in Microbiology},
  publisher    = {Frontiers},
  title        = {{High rates of genome rearrangements and pathogenicity of Shigella spp}},
  doi          = {10.3389/fmicb.2021.628622},
  volume       = {12},
  year         = {2021},
}

@article{8263,
  abstract     = {Background: The genus Streptococcus comprises pathogens that strongly influence the health of humans and animals. Genome sequencing of multiple Streptococcus strains demonstrated high variability in gene content and order even in closely related strains of the same species and created a newly emerged object for genomic analysis, the pan-genome. Here we analysed the genome evolution of 25 strains of Streptococcus suis, 50 strains of Streptococcus pyogenes and 28 strains of Streptococcus pneumoniae.

Results: Fractions of the pan-genome, unique, periphery, and universal genes differ in size, functional composition, the level of nucleotide substitutions, and predisposition to horizontal gene transfer and genomic rearrangements. The density of substitutions in intergenic regions appears to be correlated with selection acting on adjacent genes, implying that more conserved genes tend to have more conserved regulatory regions.
The total pan-genome of the genus is open, but only due to strain-specific genes, whereas other pan-genome fractions reach saturation. We have identified the set of genes with phylogenies inconsistent with species and non-conserved location in the chromosome; these genes are rare in at least one species and have likely experienced recent horizontal transfer between species. The strain-specific fraction is enriched with mobile elements and hypothetical proteins, but also contains a number of candidate virulence-related genes, so it may have a strong impact on adaptability and pathogenicity.
Mapping the rearrangements to the phylogenetic tree revealed large parallel inversions in all species. A parallel inversion of length 15 kB with breakpoints formed by genes encoding surface antigen proteins PhtD and PhtB in S. pneumoniae leads to replacement of gene fragments that likely indicates the action of an antigen variation mechanism.

Conclusions: Members of genus Streptococcus have a highly dynamic, open pan-genome, that potentially confers them with the ability to adapt to changing environmental conditions, i.e. antibiotic resistance or transmission between different hosts. Hence, integrated analysis of all aspects of genome evolution is important for the identification of potential pathogens and design of drugs and vaccines.},
  author       = {Shelyakin, Pavel V. and Bochkareva, Olga and Karan, Anna A. and Gelfand, Mikhail S.},
  issn         = {1471-2148},
  journal      = {BMC Evolutionary Biology},
  publisher    = {Springer Nature},
  title        = {{Micro-evolution of three Streptococcus species: Selection, antigenic variation, and horizontal gene inflow}},
  doi          = {10.1186/s12862-019-1403-6},
  volume       = {19},
  year         = {2019},
}

@article{6898,
  abstract     = {Background

Chlamydia are ancient intracellular pathogens with reduced, though strikingly conserved genome. Despite their parasitic lifestyle and isolated intracellular environment, these bacteria managed to avoid accumulation of deleterious mutations leading to subsequent genome degradation characteristic for many parasitic bacteria.
Results

We report pan-genomic analysis of sixteen species from genus Chlamydia including identification and functional annotation of orthologous genes, and characterization of gene gains, losses, and rearrangements. We demonstrate the overall genome stability of these bacteria as indicated by a large fraction of common genes with conserved genomic locations. On the other hand, extreme evolvability is confined to several paralogous gene families such as polymorphic membrane proteins and phospholipase D, and likely is caused by the pressure from the host immune system.
Conclusions

This combination of a large, conserved core genome and a small, evolvable periphery likely reflect the balance between the selective pressure towards genome reduction and the need to adapt to escape from the host immunity.},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  issn         = {14712164},
  journal      = {BMC Genomics},
  number       = {1},
  publisher    = {BioMed Central},
  title        = {{Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.1186/s12864-019-6059-5},
  volume       = {20},
  year         = {2019},
}

@misc{9731,
  abstract     = {OGs with putative pseudogenes by the number of affected genomes in different chlamydial species. Frameshift and nonsense mutations located less than 60 bp upstreamof the gene end or present in a single genome from the corresponding OG were excluded. (CSV 31 kb)},
  author       = {Sigalova, Olga and Chaplin, Andrei and Bochkareva, Olga and Shelyakin, Pavel and Filaretov, Vsevolod and Akkuratov, Evgeny and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 11 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808772.v1},
  year         = {2019},
}

@misc{9783,
  abstract     = {Predicted frameshift and nonsense mutations in Chlamydial pan-genome. For the analysis of putative pseudogenes, events located less than 60 bp. away from gene end or present in a single genome from the corresponding OG were excluded. (CSV 600 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 10 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808760.v1},
  year         = {2019},
}

@misc{9890,
  abstract     = {Distribution of OGs with mosaic phyletic patterns across species (complete genomes only). (CSV 7 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 15 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808802.v1},
  year         = {2019},
}

@misc{9892,
  abstract     = {Distribution of OGs with mosaic phyletic patterns across species (all genomes). (CSV 10 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 16 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808814.v1},
  year         = {2019},
}

@misc{9893,
  abstract     = {Summary of peripheral genesa phyletic patterns and tree concordance. (CSV 26 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 17 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808820.v1},
  year         = {2019},
}

@misc{9894,
  abstract     = {Orthologous families (OFs) derived by MCL clustering of OGs. (CSV 189 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 18 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808826.v1},
  year         = {2019},
}

@misc{9895,
  abstract     = {Additional information on proteins from OG1. (CSV 30 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 19 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808835.v1},
  year         = {2019},
}

@misc{9896,
  abstract     = {Summary of the analysed genomes. (CSV 24 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 1 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808841.v1},
  year         = {2019},
}

@misc{9897,
  abstract     = {Frameshift and nonsense mutations near homopolymeric tracts of OG1 genes. Only 374 genes with typical length and domain composition were considered. (CSV 6 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 20 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808850.v1},
  year         = {2019},
}

@misc{9898,
  abstract     = {All polyN tracts of length 5 or more nucleotides in sequences of genes from OG1. Sequences were extracted and scanned prior to automatic correction for frameshifts implemented in the RAST pipeline. (CSV 133 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 21 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808859.v1},
  year         = {2019},
}

@misc{9899,
  abstract     = {Summary of orthologous groups (OGs) for 227 genomes of genus Chlamydia. (CSV 362 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 2 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808865.v1},
  year         = {2019},
}

@misc{9900,
  abstract     = {Pan-genome statistics by species. (CSV 3 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 5 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808886.v1},
  year         = {2019},
}

@misc{9901,
  abstract     = {Clusters of Orthologous Genes (COGs) and corresponding functional categories assigned to OGs. (CSV 117 kb)},
  author       = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.},
  publisher    = {Springer Nature},
  title        = {{Additional file 9 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}},
  doi          = {10.6084/m9.figshare.9808907.v1},
  year         = {2019},
}

@article{8262,
  abstract     = {Background: The genus Burkholderia consists of species that occupy remarkably diverse ecological niches. Its best known members are important pathogens, B. mallei and B. pseudomallei, which cause glanders and melioidosis, respectively. Burkholderia genomes are unusual due to their multichromosomal organization, generally comprised of 2-3 chromosomes.

Results: We performed integrated genomic analysis of 127 Burkholderia strains. The pan-genome is open with the saturation to be reached between 86,000 and 88,000 genes. The reconstructed rearrangements indicate a strong avoidance of intra-replichore inversions that is likely caused by selection against the transfer of large groups of genes between the leading and the lagging strands. Translocated genes also tend to retain their position in the leading or the lagging strand, and this selection is stronger for large syntenies. Integrated reconstruction of chromosome rearrangements in the context of strains phylogeny reveals parallel rearrangements that may indicate inversion-based phase variation and integration of new genomic islands. In particular, we detected parallel inversions in the second chromosomes of B. pseudomallei with breakpoints formed by genes encoding membrane components of multidrug resistance complex, that may be linked to a phase variation mechanism. Two genomic islands, spreading horizontally between chromosomes, were detected in the B. cepacia group.

Conclusions: This study demonstrates the power of integrated analysis of pan-genomes, chromosome rearrangements, and selection regimes. Non-random inversion patterns indicate selective pressure, inversions are particularly frequent in a recent pathogen B. mallei, and, together with periods of positive selection at other branches, may indicate adaptation to new niches. One such adaptation could be a possible phase variation mechanism in B. pseudomallei.},
  author       = {Bochkareva, Olga and Moroz, Elena V. and Davydov, Iakov I. and Gelfand, Mikhail S.},
  issn         = {1471-2164},
  journal      = {BMC Genomics},
  publisher    = {Springer Nature},
  title        = {{Genome rearrangements and selection in multi-chromosome bacteria Burkholderia spp.}},
  doi          = {10.1186/s12864-018-5245-1},
  volume       = {19},
  year         = {2018},
}

