@article{11187,
  abstract     = {During the COVID-19 pandemic, genomics and bioinformatics have emerged as essential public health tools. The genomic data acquired using these methods have supported the global health response, facilitated the development of testing methods and allowed the timely tracking of novel SARS-CoV-2 variants. Yet the virtually unlimited potential for rapid generation and analysis of genomic data is also coupled with unique technical, scientific and organizational challenges. Here, we discuss the application of genomic and computational methods for efficient data-driven COVID-19 response, the advantages of the democratization of viral sequencing around the world and the challenges associated with viral genome data collection and processing.},
  author       = {Knyazev, Sergey and Chhugani, Karishma and Sarwal, Varuni and Ayyala, Ram and Singh, Harman and Karthikeyan, Smruthi and Deshpande, Dhrithi and Baykal, Pelin Icer and Comarova, Zoia and Lu, Angela and Porozov, Yuri and Vasylyeva, Tetyana I. and Wertheim, Joel O. and Tierney, Braden T. and Chiu, Charles Y. and Sun, Ren and Wu, Aiping and Abedalthagafi, Malak S. and Pak, Victoria M. and Nagaraj, Shivashankar H. and Smith, Adam L. and Skums, Pavel and Pasaniuc, Bogdan and Komissarov, Andrey and Mason, Christopher E. and Bortz, Eric and Lemey, Philippe and Kondrashov, Fyodor and Beerenwinkel, Niko and Lam, Tommy Tsan Yuk and Wu, Nicholas C. and Zelikovsky, Alex and Knight, Rob and Crandall, Keith A. and Mangul, Serghei},
  issn         = {1548-7105},
  journal      = {Nature Methods},
  number       = {4},
  pages        = {374--380},
  publisher    = {Springer Nature},
  title        = {{Unlocking capacities of genomics for the COVID-19 response and future pandemics}},
  doi          = {10.1038/s41592-022-01444-z},
  volume       = {19},
  year         = {2022},
}

@article{11447,
  abstract     = {Empirical essays of fitness landscapes suggest that they may be rugged, that is having multiple fitness peaks. Such fitness landscapes, those that have multiple peaks, necessarily have special local structures, called reciprocal sign epistasis (Poelwijk et al. in J Theor Biol 272:141–144, 2011). Here, we investigate the quantitative relationship between the number of fitness peaks and the number of reciprocal sign epistatic interactions. Previously, it has been shown (Poelwijk et al. in J Theor Biol 272:141–144, 2011) that pairwise reciprocal sign epistasis is a necessary but not sufficient condition for the existence of multiple peaks. Applying discrete Morse theory, which to our knowledge has never been used in this context, we extend this result by giving the minimal number of reciprocal sign epistatic interactions required to create a given number of peaks.},
  author       = {Saona Urmeneta, Raimundo J and Kondrashov, Fyodor and Khudiakova, Kseniia},
  issn         = {1522-9602},
  journal      = {Bulletin of Mathematical Biology},
  keywords     = {Computational Theory and Mathematics, General Agricultural and Biological Sciences, Pharmacology, General Environmental Science, General Biochemistry, Genetics and Molecular Biology, General Mathematics, Immunology, General Neuroscience},
  number       = {8},
  publisher    = {Springer Nature},
  title        = {{Relation between the number of peaks and the number of reciprocal sign epistatic interactions}},
  doi          = {10.1007/s11538-022-01029-z},
  volume       = {84},
  year         = {2022},
}

@article{11448,
  abstract     = {Studies of protein fitness landscapes reveal biophysical constraints guiding protein evolution and empower prediction of functional proteins. However, generalisation of these findings is limited due to scarceness of systematic data on fitness landscapes of proteins with a defined evolutionary relationship. We characterized the fitness peaks of four orthologous fluorescent proteins with a broad range of sequence divergence. While two of the four studied fitness peaks were sharp, the other two were considerably flatter, being almost entirely free of epistatic interactions. Mutationally robust proteins, characterized by a flat fitness peak, were not optimal templates for machine-learning-driven protein design – instead, predictions were more accurate for fragile proteins with epistatic landscapes. Our work paves insights for practical application of fitness landscape heterogeneity in protein engineering.},
  author       = {Gonzalez Somermeyer, Louisa and Fleiss, Aubin and Mishin, Alexander S and Bozhanova, Nina G and Igolkina, Anna A and Meiler, Jens and Alaball Pujol, Maria-Elisenda and Putintseva, Ekaterina V and Sarkisyan, Karen S and Kondrashov, Fyodor},
  issn         = {2050-084X},
  journal      = {eLife},
  keywords     = {General Immunology and Microbiology, General Biochemistry, Genetics and Molecular Biology, General Medicine, General Neuroscience},
  publisher    = {eLife Sciences Publications},
  title        = {{Heterogeneity of the GFP fitness landscape and data-driven protein design}},
  doi          = {10.7554/elife.75842},
  volume       = {11},
  year         = {2022},
}

@article{9905,
  abstract     = {Vaccines are thought to be the best available solution for controlling the ongoing SARS-CoV-2 pandemic. However, the emergence of vaccine-resistant strains may come too rapidly for current vaccine developments to alleviate the health, economic and social consequences of the pandemic. To quantify and characterize the risk of such a scenario, we created a SIR-derived model with initial stochastic dynamics of the vaccine-resistant strain to study the probability of its emergence and establishment. Using parameters realistically resembling SARS-CoV-2 transmission, we model a wave-like pattern of the pandemic and consider the impact of the rate of vaccination and the strength of non-pharmaceutical intervention measures on the probability of emergence of a resistant strain. As expected, we found that a fast rate of vaccination decreases the probability of emergence of a resistant strain. Counterintuitively, when a relaxation of non-pharmaceutical interventions happened at a time when most individuals of the population have already been vaccinated the probability of emergence of a resistant strain was greatly increased. Consequently, we show that a period of transmission reduction close to the end of the vaccination campaign can substantially reduce the probability of resistant strain establishment. Our results suggest that policymakers and individuals should consider maintaining non-pharmaceutical interventions and transmission-reducing behaviours throughout the entire vaccination period.},
  author       = {Rella, Simon and Kulikova, Yuliya A. and Dermitzakis, Emmanouil T. and Kondrashov, Fyodor},
  issn         = {20452322},
  journal      = {Scientific Reports},
  number       = {1},
  publisher    = {Springer Nature},
  title        = {{Rates of SARS-CoV-2 transmission and vaccination impact the fate of vaccine-resistant strains}},
  doi          = {10.1038/s41598-021-95025-3},
  volume       = {11},
  year         = {2021},
}

@article{9910,
  abstract     = {Adult height inspired the first biometrical and quantitative genetic studies and is a test-case trait for understanding heritability. The studies of height led to formulation of the classical polygenic model, that has a profound influence on the way we view and analyse complex traits. An essential part of the classical model is an assumption of additivity of effects and normality of the distribution of the residuals. However, it may be expected that the normal approximation will become insufficient in bigger studies. Here, we demonstrate that when the height of hundreds of thousands of individuals is analysed, the model complexity needs to be increased to include non-additive interactions between sex, environment and genes. Alternatively, the use of log-normal approximation allowed us to still use the additive effects model. These findings are important for future genetic and methodologic studies that make use of adult height as an exemplar trait.},
  author       = {Slavskii, Sergei A. and Kuznetsov, Ivan A. and Shashkova, Tatiana I. and Bazykin, Georgii A. and Axenovich, Tatiana I. and Kondrashov, Fyodor and Aulchenko, Yurii S.},
  issn         = {14765438},
  journal      = {European Journal of Human Genetics},
  number       = {7},
  pages        = {1082--1091},
  publisher    = {Springer Nature},
  title        = {{The limits of normal approximation for adult height}},
  doi          = {10.1038/s41431-021-00836-7},
  volume       = {29},
  year         = {2021},
}

@article{7889,
  abstract     = {Autoluminescent plants engineered to express a bacterial bioluminescence gene cluster in plastids have not been widely adopted because of low light output. We engineered tobacco plants with a fungal bioluminescence system that converts caffeic acid (present in all plants) into luciferin and report self-sustained luminescence that is visible to the naked eye. Our findings could underpin development of a suite of imaging tools for plants.},
  author       = {Mitiouchkina, Tatiana and Mishin, Alexander S. and Gonzalez Somermeyer, Louisa and Markina, Nadezhda M. and Chepurnyh, Tatiana V. and Guglya, Elena B. and Karataeva, Tatiana A. and Palkina, Kseniia A. and Shakhova, Ekaterina S. and Fakhranurova, Liliia I. and Chekova, Sofia V. and Tsarkova, Aleksandra S. and Golubev, Yaroslav V. and Negrebetsky, Vadim V. and Dolgushin, Sergey A. and Shalaev, Pavel V. and Shlykov, Dmitry and Melnik, Olesya A. and Shipunova, Victoria O. and Deyev, Sergey M. and Bubyrev, Andrey I. and Pushin, Alexander S. and Choob, Vladimir V. and Dolgov, Sergey V. and Kondrashov, Fyodor and Yampolsky, Ilia V. and Sarkisyan, Karen S.},
  issn         = {1546-1696},
  journal      = {Nature Biotechnology},
  pages        = {944--946},
  publisher    = {Springer Nature},
  title        = {{Plants with genetically encoded autoluminescence}},
  doi          = {10.1038/s41587-020-0500-9},
  volume       = {38},
  year         = {2020},
}

@article{7181,
  abstract     = {Multiple sequence alignments (MSAs) are used for structural1,2 and evolutionary predictions1,2, but the complexity of aligning large datasets requires the use of approximate solutions3, including the progressive algorithm4. Progressive MSA methods start by aligning the most similar sequences and subsequently incorporate the remaining sequences, from leaf-to-root, based on a guide-tree. Their accuracy declines substantially as the number of sequences is scaled up5. We introduce a regressive algorithm that enables MSA of up to 1.4 million sequences on a standard workstation and substantially improves accuracy on datasets larger than 10,000 sequences. Our regressive algorithm works the other way around to the progressive algorithm and begins by aligning the most dissimilar sequences. It uses an efficient divide-and-conquer strategy to run third-party alignment methods in linear time, regardless of their original complexity. Our approach will enable analyses of extremely large genomic datasets such as the recently announced Earth BioGenome Project, which comprises 1.5 million eukaryotic genomes6.},
  author       = {Garriga, Edgar and Di Tommaso, Paolo and Magis, Cedrik and Erb, Ionas and Mansouri, Leila and Baltzis, Athanasios and Laayouni, Hafid and Kondrashov, Fyodor and Floden, Evan and Notredame, Cedric},
  issn         = {15461696},
  journal      = {Nature Biotechnology},
  number       = {12},
  pages        = {1466--1470},
  publisher    = {Springer Nature},
  title        = {{Large multiple sequence alignments with a root-to-leaf regressive method}},
  doi          = {10.1038/s41587-019-0333-6},
  volume       = {37},
  year         = {2019},
}