@article{15179, abstract = {The fungal bioluminescence pathway can be reconstituted in other organisms allowing luminescence imaging without exogenously supplied substrate. The pathway starts from hispidin biosynthesis—a step catalyzed by a large fungal polyketide synthase that requires a posttranslational modification for activity. Here, we report identification of alternative compact hispidin synthases encoded by a phylogenetically diverse group of plants. A hybrid bioluminescence pathway that combines plant and fungal genes is more compact, not dependent on availability of machinery for posttranslational modifications, and confers autonomous bioluminescence in yeast, mammalian, and plant hosts. The compact size of plant hispidin synthases enables additional modes of delivery of autoluminescence, such as delivery with viral vectors.}, author = {Palkina, Kseniia A. and Karataeva, Tatiana A. and Perfilov, Maxim M. and Fakhranurova, Liliia I. and Markina, Nadezhda M. and Gonzalez Somermeyer, Louisa and Garcia-Perez, Elena and Vazquez-Vilar, Marta and Rodriguez-Rodriguez, Marta and Vazquez-Vilriales, Victor and Shakhova, Ekaterina S. and Mitiouchkina, Tatiana and Belozerova, Olga A. and Kovalchuk, Sergey I. and Alekberova, Anna and Malyshevskaia, Alena K. and Bugaeva, Evgenia N. and Guglya, Elena B. and Balakireva, Anastasia and Sytov, Nikita and Bezlikhotnova, Anastasia and Boldyreva, Daria I. and Babenko, Vladislav V. and Kondrashov, Fyodor and Choob, Vladimir V. and Orzaez, Diego and Yampolsky, Ilia V. and Mishin, Alexander S. and Sarkisyan, Karen S.}, issn = {2375-2548}, journal = {Science Advances}, number = {10}, publisher = {American Association for the Advancement of Science}, title = {{A hybrid pathway for self-sustained luminescence}}, doi = {10.1126/sciadv.adk1992}, volume = {10}, year = {2024}, } @article{12758, abstract = {AlphaFold changed the field of structural biology by achieving three-dimensional (3D) structure prediction from protein sequence at experimental quality. The astounding success even led to claims that the protein folding problem is “solved”. However, protein folding problem is more than just structure prediction from sequence. Presently, it is unknown if the AlphaFold-triggered revolution could help to solve other problems related to protein folding. Here we assay the ability of AlphaFold to predict the impact of single mutations on protein stability (ΔΔG) and function. To study the question we extracted the pLDDT and metrics from AlphaFold predictions before and after single mutation in a protein and correlated the predicted change with the experimentally known ΔΔG values. Additionally, we correlated the same AlphaFold pLDDT metrics with the impact of a single mutation on structure using a large scale dataset of single mutations in GFP with the experimentally assayed levels of fluorescence. We found a very weak or no correlation between AlphaFold output metrics and change of protein stability or fluorescence. Our results imply that AlphaFold may not be immediately applied to other problems or applications in protein folding.}, author = {Pak, Marina A. and Markhieva, Karina A. and Novikova, Mariia S. and Petrov, Dmitry S. and Vorobyev, Ilya S. and Maksimova, Ekaterina and Kondrashov, Fyodor and Ivankov, Dmitry N.}, issn = {1932-6203}, journal = {PLoS ONE}, number = {3}, publisher = {Public Library of Science}, title = {{Using AlphaFold to predict the impact of single mutations on protein stability and function}}, doi = {10.1371/journal.pone.0282689}, volume = {18}, year = {2023}, } @article{13164, abstract = {Molecular compatibility between gametes is a prerequisite for successful fertilization. As long as a sperm and egg can recognize and bind each other via their surface proteins, gamete fusion may occur even between members of separate species, resulting in hybrids that can impact speciation. The egg membrane protein Bouncer confers species specificity to gamete interactions between medaka and zebrafish, preventing their cross-fertilization. Here, we leverage this specificity to uncover distinct amino acid residues and N-glycosylation patterns that differentially influence the function of medaka and zebrafish Bouncer and contribute to cross-species incompatibility. Curiously, in contrast to the specificity observed for medaka and zebrafish Bouncer, seahorse and fugu Bouncer are compatible with both zebrafish and medaka sperm, in line with the pervasive purifying selection that dominates Bouncer’s evolution. The Bouncer-sperm interaction is therefore the product of seemingly opposing evolutionary forces that, for some species, restrict fertilization to closely related fish, and for others, allow broad gamete compatibility that enables hybridization.}, author = {Gert, Krista R.B. and Panser, Karin and Surm, Joachim and Steinmetz, Benjamin S. and Schleiffer, Alexander and Jovine, Luca and Moran, Yehu and Kondrashov, Fyodor and Pauli, Andrea}, issn = {2041-1723}, journal = {Nature Communications}, publisher = {Springer Nature}, title = {{Divergent molecular signatures in fish Bouncer proteins define cross-fertilization boundaries}}, doi = {10.1038/s41467-023-39317-4}, volume = {14}, year = {2023}, } @article{13976, abstract = {Conflicts and natural disasters affect entire populations of the countries involved and, in addition to the thousands of lives destroyed, have a substantial negative impact on the scientific advances these countries provide. The unprovoked invasion of Ukraine by Russia, the devastating earthquake in Turkey and Syria, and the ongoing conflicts in the Middle East are just a few examples. Millions of people have been killed or displaced, their futures uncertain. These events have resulted in extensive infrastructure collapse, with loss of electricity, transportation, and access to services. Schools, universities, and research centers have been destroyed along with decades’ worth of data, samples, and findings. Scholars in disaster areas face short- and long-term problems in terms of what they can accomplish now for obtaining grants and for employment in the long run. In our interconnected world, conflicts and disasters are no longer a local problem but have wide-ranging impacts on the entire world, both now and in the future. Here, we focus on the current and ongoing impact of war on the scientific community within Ukraine and from this draw lessons that can be applied to all affected countries where scientists at risk are facing hardship. We present and classify examples of effective and feasible mechanisms used to support researchers in countries facing hardship and discuss how these can be implemented with help from the international scientific community and what more is desperately needed. Reaching out, providing accessible training opportunities, and developing collaborations should increase inclusion and connectivity, support scientific advancements within affected communities, and expedite postwar and disaster recovery.}, author = {Wolfsberger, Walter and Chhugani, Karishma and Shchubelka, Khrystyna and Frolova, Alina and Salyha, Yuriy and Zlenko, Oksana and Arych, Mykhailo and Dziuba, Dmytro and Parkhomenko, Andrii and Smolanka, Volodymyr and Gümüş, Zeynep H. and Sezgin, Efe and Diaz-Lameiro, Alondra and Toth, Viktor R. and Maci, Megi and Bortz, Eric and Kondrashov, Fyodor and Morton, Patricia M. and Łabaj, Paweł P. and Romero, Veronika and Hlávka, Jakub and Mangul, Serghei and Oleksyk, Taras K.}, issn = {2047-217X}, journal = {GigaScience}, publisher = {Oxford Academic}, title = {{Scientists without borders: Lessons from Ukraine}}, doi = {10.1093/gigascience/giad045}, volume = {12}, year = {2023}, } @article{14716, abstract = {Background: Antimicrobial resistance (AMR) poses a significant global health threat, and an accurate prediction of bacterial resistance patterns is critical for effective treatment and control strategies. In recent years, machine learning (ML) approaches have emerged as powerful tools for analyzing large-scale bacterial AMR data. However, ML methods often ignore evolutionary relationships among bacterial strains, which can greatly impact performance of the ML methods, especially if resistance-associated features are attempted to be detected. Genome-wide association studies (GWAS) methods like linear mixed models accounts for the evolutionary relationships in bacteria, but they uncover only highly significant variants which have already been reported in literature. Results: In this work, we introduce a novel phylogeny-related parallelism score (PRPS), which measures whether a certain feature is correlated with the population structure of a set of samples. We demonstrate that PRPS can be used, in combination with SVM- and random forest-based models, to reduce the number of features in the analysis, while simultaneously increasing models’ performance. We applied our pipeline to publicly available AMR data from PATRIC database for Mycobacterium tuberculosis against six common antibiotics. Conclusions: Using our pipeline, we re-discovered known resistance-associated mutations as well as new candidate mutations which can be related to resistance and not previously reported in the literature. We demonstrated that taking into account phylogenetic relationships not only improves the model performance, but also yields more biologically relevant predicted most contributing resistance markers.}, author = {Yurtseven, Alper and Buyanova, Sofia and Agrawal, Amay Ajaykumar A. and Bochkareva, Olga and Kalinina, Olga V V.}, issn = {1471-2180}, journal = {BMC Microbiology}, number = {1}, publisher = {Springer Nature}, title = {{Machine learning and phylogenetic analysis allow for predicting antibiotic resistance in M. tuberculosis}}, doi = {10.1186/s12866-023-03147-7}, volume = {23}, year = {2023}, } @article{10927, abstract = {Motivation High plasticity of bacterial genomes is provided by numerous mechanisms including horizontal gene transfer and recombination via numerous flanking repeats. Genome rearrangements such as inversions, deletions, insertions and duplications may independently occur in different strains, providing parallel adaptation or phenotypic diversity. Specifically, such rearrangements might be responsible for virulence, antibiotic resistance and antigenic variation. However, identification of such events requires laborious manual inspection and verification of phyletic pattern consistency. Results Here, we define the term ‘parallel rearrangements’ as events that occur independently in phylogenetically distant bacterial strains and present a formalization of the problem of parallel rearrangements calling. We implement an algorithmic solution for the identification of parallel rearrangements in bacterial populations as a tool PaReBrick. The tool takes a collection of strains represented as a sequence of oriented synteny blocks and a phylogenetic tree as input data. It identifies rearrangements, tests them for consistency with a tree, and sorts the events by their parallelism score. The tool provides diagrams of the neighbors for each block of interest, allowing the detection of horizontally transferred blocks or their extra copies and the inversions in which copied blocks are involved. We demonstrated PaReBrick’s efficiency and accuracy and showed its potential to detect genome rearrangements responsible for pathogenicity and adaptation in bacterial genomes.}, author = {Zabelkin, Alexey and Yakovleva, Yulia and Bochkareva, Olga and Alexeev, Nikita}, issn = {1460-2059}, journal = {Bioinformatics}, number = {2}, pages = {357--363}, publisher = {Oxford Academic}, title = {{PaReBrick: PArallel REarrangements and BReaks identification toolkit}}, doi = {10.1093/bioinformatics/btab691}, volume = {38}, year = {2022}, } @article{11187, abstract = {During the COVID-19 pandemic, genomics and bioinformatics have emerged as essential public health tools. The genomic data acquired using these methods have supported the global health response, facilitated the development of testing methods and allowed the timely tracking of novel SARS-CoV-2 variants. Yet the virtually unlimited potential for rapid generation and analysis of genomic data is also coupled with unique technical, scientific and organizational challenges. Here, we discuss the application of genomic and computational methods for efficient data-driven COVID-19 response, the advantages of the democratization of viral sequencing around the world and the challenges associated with viral genome data collection and processing.}, author = {Knyazev, Sergey and Chhugani, Karishma and Sarwal, Varuni and Ayyala, Ram and Singh, Harman and Karthikeyan, Smruthi and Deshpande, Dhrithi and Baykal, Pelin Icer and Comarova, Zoia and Lu, Angela and Porozov, Yuri and Vasylyeva, Tetyana I. and Wertheim, Joel O. and Tierney, Braden T. and Chiu, Charles Y. and Sun, Ren and Wu, Aiping and Abedalthagafi, Malak S. and Pak, Victoria M. and Nagaraj, Shivashankar H. and Smith, Adam L. and Skums, Pavel and Pasaniuc, Bogdan and Komissarov, Andrey and Mason, Christopher E. and Bortz, Eric and Lemey, Philippe and Kondrashov, Fyodor and Beerenwinkel, Niko and Lam, Tommy Tsan Yuk and Wu, Nicholas C. and Zelikovsky, Alex and Knight, Rob and Crandall, Keith A. and Mangul, Serghei}, issn = {1548-7105}, journal = {Nature Methods}, number = {4}, pages = {374--380}, publisher = {Springer Nature}, title = {{Unlocking capacities of genomics for the COVID-19 response and future pandemics}}, doi = {10.1038/s41592-022-01444-z}, volume = {19}, year = {2022}, } @article{11344, abstract = {Until recently, Shigella and enteroinvasive Escherichia coli were thought to be primate-restricted pathogens. The base of their pathogenicity is the type 3 secretion system (T3SS) encoded by the pINV virulence plasmid, which facilitates host cell invasion and subsequent proliferation. A large family of T3SS effectors, E3 ubiquitin-ligases encoded by the ipaH genes, have a key role in the Shigella pathogenicity through the modulation of cellular ubiquitination that degrades host proteins. However, recent genomic studies identified ipaH genes in the genomes of Escherichia marmotae, a potential marmot pathogen, and an E. coli extracted from fecal samples of bovine calves, suggesting that non-human hosts may also be infected by these strains, potentially pathogenic to humans. We performed a comparative genomic study of the functional repertoires in the ipaH gene family in Shigella and enteroinvasive Escherichia from human and predicted non-human hosts. We found that fewer than half of Shigella genomes had a complete set of ipaH genes, with frequent gene losses and duplications that were not consistent with the species tree and nomenclature. Non-human host IpaH proteins had a diverse set of substrate-binding domains and, in contrast to the Shigella proteins, two variants of the NEL C-terminal domain. Inconsistencies between strains phylogeny and composition of effectors indicate horizontal gene transfer between E. coli adapted to different hosts. These results provide a framework for understanding of ipaH-mediated host-pathogens interactions and suggest a need for a genomic study of fecal samples from diseased animals.}, author = {Dranenko, NO and Tutukina, MN and Gelfand, MS and Kondrashov, Fyodor and Bochkareva, Olga}, issn = {2045-2322}, journal = {Scientific Reports}, publisher = {Springer Nature}, title = {{Chromosome-encoded IpaH ubiquitin ligases indicate non-human enteroinvasive Escherichia}}, doi = {10.1038/s41598-022-10827-3}, volume = {12}, year = {2022}, } @article{11448, abstract = {Studies of protein fitness landscapes reveal biophysical constraints guiding protein evolution and empower prediction of functional proteins. However, generalisation of these findings is limited due to scarceness of systematic data on fitness landscapes of proteins with a defined evolutionary relationship. We characterized the fitness peaks of four orthologous fluorescent proteins with a broad range of sequence divergence. While two of the four studied fitness peaks were sharp, the other two were considerably flatter, being almost entirely free of epistatic interactions. Mutationally robust proteins, characterized by a flat fitness peak, were not optimal templates for machine-learning-driven protein design – instead, predictions were more accurate for fragile proteins with epistatic landscapes. Our work paves insights for practical application of fitness landscape heterogeneity in protein engineering.}, author = {Gonzalez Somermeyer, Louisa and Fleiss, Aubin and Mishin, Alexander S and Bozhanova, Nina G and Igolkina, Anna A and Meiler, Jens and Alaball Pujol, Maria-Elisenda and Putintseva, Ekaterina V and Sarkisyan, Karen S and Kondrashov, Fyodor}, issn = {2050-084X}, journal = {eLife}, keywords = {General Immunology and Microbiology, General Biochemistry, Genetics and Molecular Biology, General Medicine, General Neuroscience}, publisher = {eLife Sciences Publications}, title = {{Heterogeneity of the GFP fitness landscape and data-driven protein design}}, doi = {10.7554/elife.75842}, volume = {11}, year = {2022}, } @article{11587, abstract = {Background: Accurate and comprehensive annotation of transcript sequences is essential for transcript quantification and differential gene and transcript expression analysis. Single-molecule long-read sequencing technologies provide improved integrity of transcript structures including alternative splicing, and transcription start and polyadenylation sites. However, accuracy is significantly affected by sequencing errors, mRNA degradation, or incomplete cDNA synthesis. Results: We present a new and comprehensive Arabidopsis thaliana Reference Transcript Dataset 3 (AtRTD3). AtRTD3 contains over 169,000 transcripts—twice that of the best current Arabidopsis transcriptome and including over 1500 novel genes. Seventy-eight percent of transcripts are from Iso-seq with accurately defined splice junctions and transcription start and end sites. We develop novel methods to determine splice junctions and transcription start and end sites accurately. Mismatch profiles around splice junctions provide a powerful feature to distinguish correct splice junctions and remove false splice junctions. Stratified approaches identify high-confidence transcription start and end sites and remove fragmentary transcripts due to degradation. AtRTD3 is a major improvement over existing transcriptomes as demonstrated by analysis of an Arabidopsis cold response RNA-seq time-series. AtRTD3 provides higher resolution of transcript expression profiling and identifies cold-induced differential transcription start and polyadenylation site usage. Conclusions: AtRTD3 is the most comprehensive Arabidopsis transcriptome currently. It improves the precision of differential gene and transcript expression, differential alternative splicing, and transcription start/end site usage analysis from RNA-seq data. The novel methods for identifying accurate splice junctions and transcription start/end sites are widely applicable and will improve single-molecule sequencing analysis from any species.}, author = {Zhang, Runxuan and Kuo, Richard and Coulter, Max and Calixto, Cristiane P.G. and Entizne, Juan Carlos and Guo, Wenbin and Marquez, Yamile and Milne, Linda and Riegler, Stefan and Matsui, Akihiro and Tanaka, Maho and Harvey, Sarah and Gao, Yubang and Wießner-Kroh, Theresa and Paniagua, Alejandro and Crespi, Martin and Denby, Katherine and Hur, Asa Ben and Huq, Enamul and Jantsch, Michael and Jarmolowski, Artur and Koester, Tino and Laubinger, Sascha and Li, Qingshun Quinn and Gu, Lianfeng and Seki, Motoaki and Staiger, Dorothee and Sunkar, Ramanjulu and Szweykowska-Kulinska, Zofia and Tu, Shih Long and Wachter, Andreas and Waugh, Robbie and Xiong, Liming and Zhang, Xiao Ning and Conesa, Ana and Reddy, Anireddy S.N. and Barta, Andrea and Kalyna, Maria and Brown, John W.S.}, issn = {1474-760X}, journal = {Genome Biology}, publisher = {BioMed Central}, title = {{A high-resolution single-molecule sequencing-based Arabidopsis transcriptome using novel methods of Iso-seq analysis}}, doi = {10.1186/s13059-022-02711-0}, volume = {23}, year = {2022}, } @article{12131, abstract = {Replication-incompetent adenoviral vectors have been extensively used as a platform for vaccine design, with at least four anti-COVID-19 vaccines authorized to date. These vaccines elicit neutralizing antibody responses directed against SARS-CoV-2 Spike protein and confer significant level of protection against SARS-CoV-2 infection. Immunization with adenovirus-vectored vaccines is known to be accompanied by the production of anti-vector antibodies, which may translate into reduced efficacy of booster or repeated rounds of revaccination. Here, we used blood samples from patients who received an adenovirus-based Gam-COVID-Vac vaccine to address the question of whether anti-vector antibodies may influence the magnitude of SARS-CoV-2-specific humoral response after booster vaccination. We observed that rAd26-based prime vaccination with Gam-COVID-Vac induced the development of Ad26-neutralizing antibodies, which persisted in circulation for at least 9 months. Our analysis further indicates that high pre-boost Ad26 neutralizing antibody titers do not appear to affect the humoral immunogenicity of the Gam-COVID-Vac boost. The titers of anti-SARS-CoV-2 RBD IgGs and antibodies, which neutralized both the wild type and the circulating variants of concern of SARS-CoV-2 such as Delta and Omicron, were independent of the pre-boost levels of Ad26-neutralizing antibodies. Thus, our results support the development of repeated immunization schedule with adenovirus-based COVID-19 vaccines.}, author = {Byazrova, Maria G. and Astakhova, Ekaterina A. and Minnegalieva, Aygul and Sukhova, Maria M. and Mikhailov, Artem A. and Prilipov, Alexey G. and Gorchakov, Andrey A. and Filatov, Alexander V.}, issn = {2059-0105}, journal = {npj Vaccines}, keywords = {Pharmacology (medical), Infectious Diseases, Pharmacology, Immunology, SARS-COV-2, COVID}, publisher = {Springer Nature}, title = {{Anti-Ad26 humoral immunity does not compromise SARS-COV-2 neutralizing antibody responses following Gam-COVID-Vac booster vaccination}}, doi = {10.1038/s41541-022-00566-x}, volume = {7}, year = {2022}, } @article{12173, abstract = {With increasing urbanization and industrialization, the prevalence of inflammatory bowel diseases (IBDs) has steadily been rising over the past two decades. IBD involves flares of gastrointestinal (GI) inflammation accompanied by microbiota perturbations. However, microbial mechanisms that trigger such flares remain elusive. Here, we analyzed the association of the emerging pathogen atypical enteropathogenic E. coli (aEPEC) with IBD disease activity. The presence of diarrheagenic E. coli was assessed in stool samples from 630 IBD patients and 234 age- and sex-matched controls without GI symptoms. Microbiota was analyzed with 16S ribosomal RNA gene amplicon sequencing, and 57 clinical aEPEC isolates were subjected to whole-genome sequencing and in vitro pathogenicity experiments including biofilm formation, epithelial barrier function and the ability to induce pro-inflammatory signaling. The presence of aEPEC correlated with laboratory, clinical and endoscopic disease activity in ulcerative colitis (UC), as well as microbiota dysbiosis. In vitro, aEPEC strains induce epithelial p21-activated kinases, disrupt the epithelial barrier and display potent biofilm formation. The effector proteins espV and espG2 distinguish aEPEC cultured from UC and Crohn’s disease patients, respectively. EspV-positive aEPEC harbor more virulence factors and have a higher pro-inflammatory potential, which is counteracted by 5-ASA. aEPEC may tip a fragile immune–microbiota homeostasis and thereby contribute to flares in UC. aEPEC isolates from UC patients display properties to disrupt the epithelial barrier and to induce pro-inflammatory signaling in vitro.}, author = {Baumgartner, Maximilian and Zirnbauer, Rebecca and Schlager, Sabine and Mertens, Daniel and Gasche, Nikolaus and Sladek, Barbara and Herbold, Craig and Bochkareva, Olga and Emelianenko, Vera and Vogelsang, Harald and Lang, Michaela and Klotz, Anton and Moik, Birgit and Makristathis, Athanasios and Berry, David and Dabsch, Stefanie and Khare, Vineeta and Gasche, Christoph}, issn = {1949-0984}, journal = {Gut Microbes}, keywords = {Infectious Diseases, Microbiology (medical), Gastroenterology, Microbiology}, number = {1}, publisher = {Taylor & Francis}, title = {{Atypical enteropathogenic E. coli are associated with disease activity in ulcerative colitis}}, doi = {10.1080/19490976.2022.2143218}, volume = {14}, year = {2022}, } @article{12116, abstract = {Russia’s unprovoked attack on Ukraine has destroyed civilian infrastructure, including universities, research centers, and other academic infrastructure (1). Many Ukrainian scholars and researchers remain in Ukraine, and their work has suffered from major setbacks (2–4). We call on international scientists and institutions to support them.}, author = {Chhugani, Karishma and Frolova, Alina and Salyha, Yuriy and Fiscutean, Andrada and Zlenko, Oksana and Reinsone, Sanita and Wolfsberger, Walter W. and Ivashchenko, Oleksandra V. and Maci, Megi and Dziuba, Dmytro and Parkhomenko, Andrii and Bortz, Eric and Kondrashov, Fyodor and Łabaj, Paweł P. and Romero, Veronika and Hlávka, Jakub and Oleksyk, Taras K. and Mangul, Serghei}, issn = {1095-9203}, journal = {Science}, number = {6626}, pages = {1285--1286}, publisher = {American Association for the Advancement of Science}, title = {{Remote opportunities for scholars in Ukraine}}, doi = {10.1126/science.adg0797}, volume = {378}, year = {2022}, } @article{9255, abstract = {Our ability to trust that a random number is truly random is essential for fields as diverse as cryptography and fundamental tests of quantum mechanics. Existing solutions both come with drawbacks—device-independent quantum random number generators (QRNGs) are highly impractical and standard semi-device-independent QRNGs are limited to a specific physical implementation and level of trust. Here we propose a framework for semi-device-independent randomness certification, using a source of trusted vacuum in the form of a signal shutter. It employs a flexible set of assumptions and levels of trust, allowing it to be applied in a wide range of physical scenarios involving both quantum and classical entropy sources. We experimentally demonstrate our protocol with a photonic setup and generate secure random bits under three different assumptions with varying degrees of security and resulting data rates.}, author = {Pivoluska, Matej and Plesch, Martin and Farkas, Máté and Ruzickova, Natalia and Flegel, Clara and Valencia, Natalia Herrera and Mccutcheon, Will and Malik, Mehul and Aguilar, Edgar A.}, issn = {2056-6387}, journal = {npj Quantum Information}, publisher = {Springer Nature}, title = {{Semi-device-independent random number generation with flexible assumptions}}, doi = {10.1038/s41534-021-00387-1}, volume = {7}, year = {2021}, } @article{9380, abstract = {Shigella are pathogens originating within the Escherichia lineage but frequently classified as a separate genus. Shigella genomes contain numerous insertion sequences (ISs) that lead to pseudogenisation of affected genes and an increase of non-homologous recombination. Here, we study 414 genomes of E. coli and Shigella strains to assess the contribution of genomic rearrangements to Shigella evolution. We found that Shigella experienced exceptionally high rates of intragenomic rearrangements and had a decreased rate of homologous recombination compared to pathogenic and non-pathogenic E. coli. The high rearrangement rate resulted in independent disruption of syntenic regions and parallel rearrangements in different Shigella lineages. Specifically, we identified two types of chromosomally encoded E3 ubiquitin-protein ligases acquired independently by all Shigella strains that also showed a high level of sequence conservation in the promoter and further in the 5′-intergenic region. In the only available enteroinvasive E. coli (EIEC) strain, which is a pathogenic E. coli with a phenotype intermediate between Shigella and non-pathogenic E. coli, we found a rate of genome rearrangements comparable to those in other E. coli and no functional copies of the two Shigella-specific E3 ubiquitin ligases. These data indicate that the accumulation of ISs influenced many aspects of genome evolution and played an important role in the evolution of intracellular pathogens. Our research demonstrates the power of comparative genomics-based on synteny block composition and an important role of non-coding regions in the evolution of genomic islands.}, author = {Seferbekova, Zaira and Zabelkin, Alexey and Yakovleva, Yulia and Afasizhev, Robert and Dranenko, Natalia O. and Alexeev, Nikita and Gelfand, Mikhail S. and Bochkareva, Olga}, issn = {1664-302X}, journal = {Frontiers in Microbiology}, publisher = {Frontiers}, title = {{High rates of genome rearrangements and pathogenicity of Shigella spp}}, doi = {10.3389/fmicb.2021.628622}, volume = {12}, year = {2021}, } @article{9910, abstract = {Adult height inspired the first biometrical and quantitative genetic studies and is a test-case trait for understanding heritability. The studies of height led to formulation of the classical polygenic model, that has a profound influence on the way we view and analyse complex traits. An essential part of the classical model is an assumption of additivity of effects and normality of the distribution of the residuals. However, it may be expected that the normal approximation will become insufficient in bigger studies. Here, we demonstrate that when the height of hundreds of thousands of individuals is analysed, the model complexity needs to be increased to include non-additive interactions between sex, environment and genes. Alternatively, the use of log-normal approximation allowed us to still use the additive effects model. These findings are important for future genetic and methodologic studies that make use of adult height as an exemplar trait.}, author = {Slavskii, Sergei A. and Kuznetsov, Ivan A. and Shashkova, Tatiana I. and Bazykin, Georgii A. and Axenovich, Tatiana I. and Kondrashov, Fyodor and Aulchenko, Yurii S.}, issn = {14765438}, journal = {European Journal of Human Genetics}, number = {7}, pages = {1082--1091}, publisher = {Springer Nature}, title = {{The limits of normal approximation for adult height}}, doi = {10.1038/s41431-021-00836-7}, volume = {29}, year = {2021}, } @article{9905, abstract = {Vaccines are thought to be the best available solution for controlling the ongoing SARS-CoV-2 pandemic. However, the emergence of vaccine-resistant strains may come too rapidly for current vaccine developments to alleviate the health, economic and social consequences of the pandemic. To quantify and characterize the risk of such a scenario, we created a SIR-derived model with initial stochastic dynamics of the vaccine-resistant strain to study the probability of its emergence and establishment. Using parameters realistically resembling SARS-CoV-2 transmission, we model a wave-like pattern of the pandemic and consider the impact of the rate of vaccination and the strength of non-pharmaceutical intervention measures on the probability of emergence of a resistant strain. As expected, we found that a fast rate of vaccination decreases the probability of emergence of a resistant strain. Counterintuitively, when a relaxation of non-pharmaceutical interventions happened at a time when most individuals of the population have already been vaccinated the probability of emergence of a resistant strain was greatly increased. Consequently, we show that a period of transmission reduction close to the end of the vaccination campaign can substantially reduce the probability of resistant strain establishment. Our results suggest that policymakers and individuals should consider maintaining non-pharmaceutical interventions and transmission-reducing behaviours throughout the entire vaccination period.}, author = {Rella, Simon and Kulikova, Yuliya A. and Dermitzakis, Emmanouil T. and Kondrashov, Fyodor}, issn = {20452322}, journal = {Scientific Reports}, number = {1}, publisher = {Springer Nature}, title = {{Rates of SARS-CoV-2 transmission and vaccination impact the fate of vaccine-resistant strains}}, doi = {10.1038/s41598-021-95025-3}, volume = {11}, year = {2021}, } @article{7603, abstract = {Plants are exposed to a variety of abiotic and biotic stresses that may result in DNA damage. Endogenous processes - such as DNA replication, DNA recombination, respiration, or photosynthesis - are also a threat to DNA integrity. It is therefore essential to understand the strategies plants have developed for DNA damage detection, signaling, and repair. Alternative splicing (AS) is a key post-transcriptional process with a role in regulation of gene expression. Recent studies demonstrate that the majority of intron-containing genes in plants are alternatively spliced, highlighting the importance of AS in plant development and stress response. Not only does AS ensure a versatile proteome and influence the abundance and availability of proteins greatly, it has also emerged as an important player in the DNA damage response (DDR) in animals. Despite extensive studies of DDR carried out in plants, its regulation at the level of AS has not been comprehensively addressed. Here, we provide some insights into the interplay between AS and DDR in plants.}, author = {Nimeth, Barbara Anna and Riegler, Stefan and Kalyna, Maria}, issn = {1664462X}, journal = {Frontiers in Plant Science}, publisher = {Frontiers}, title = {{Alternative splicing and DNA damage response in plants}}, doi = {10.3389/fpls.2020.00091}, volume = {11}, year = {2020}, } @article{7622, abstract = {The International Young Physicists' Tournament (IYPT) continued in 2018 in Beijing, China and 2019 in Warsaw, Poland with its 31st and 32nd editions. The IYPT is a modern scientific competition for teams of high school students, also known as the Physics World Cup. It involves long-term theoretical and experimental work focused on solving 17 publicly announced open-ended problems in teams of five. On top of that, teams have to present their solutions in front of other teams and a scientific jury, and get opposed and reviewed by their peers. Here we present a brief information about the competition with a specific focus on one of the IYPT 2018 tasks, the 'Ring Oiler'. This seemingly simple mechanical problem appeared to be of such a complexity that even the dozens of participating teams and jurying scientists were not able to solve all of its subtleties.}, author = {Plesch, Martin and Plesník, Samuel and Ruzickova, Natalia}, issn = {13616404}, journal = {European Journal of Physics}, number = {3}, publisher = {IOP Publishing}, title = {{The IYPT and the 'Ring Oiler' problem}}, doi = {10.1088/1361-6404/ab6414}, volume = {41}, year = {2020}, } @article{7931, abstract = {In the course of sample preparation for Next Generation Sequencing (NGS), DNA is fragmented by various methods. Fragmentation shows a persistent bias with regard to the cleavage rates of various dinucleotides. With the exception of CpG dinucleotides the previously described biases were consistent with results of the DNA cleavage in solution. Here we computed cleavage rates of all dinucleotides including the methylated CpG and unmethylated CpG dinucleotides using data of the Whole Genome Sequencing datasets of the 1000 Genomes project. We found that the cleavage rate of CpG is significantly higher for the methylated CpG dinucleotides. Using this information, we developed a classifier for distinguishing cancer and healthy tissues based on their CpG islands statuses of the fragmentation. A simple Support Vector Machine classifier based on this algorithm shows an accuracy of 84%. The proposed method allows the detection of epigenetic markers purely based on mechanochemical DNA fragmentation, which can be detected by a simple analysis of the NGS sequencing data.}, author = {Uroshlev, Leonid A. and Abdullaev, Eldar T. and Umarova, Iren R. and Il’Icheva, Irina A. and Panchenko, Larisa A. and Polozov, Robert V. and Kondrashov, Fyodor and Nechipurenko, Yury D. and Grokhovsky, Sergei L.}, issn = {20452322}, journal = {Scientific Reports}, publisher = {Springer Nature}, title = {{A method for identification of the methylation level of CpG islands from NGS data}}, doi = {10.1038/s41598-020-65406-1}, volume = {10}, year = {2020}, } @article{8320, abstract = {The genetic code is considered to use five nucleic bases (adenine, guanine, cytosine, thymine and uracil), which form two pairs for encoding information in DNA and two pairs for encoding information in RNA. Nevertheless, in recent years several artificial base pairs have been developed in attempts to expand the genetic code. Employment of these additional base pairs increases the information capacity and variety of DNA sequences, and provides a platform for the site-specific, enzymatic incorporation of extra functional components into DNA and RNA. As a result, of the development of such expanded systems, many artificial base pairs have been synthesized and tested under various conditions. Following many stages of enhancement, unnatural base pairs have been modified to eliminate their weak points, qualifying them for specific research needs. Moreover, the first attempts to create a semi-synthetic organism containing DNA with unnatural base pairs seem to have been successful. This further extends the possible applications of these kinds of pairs. Herein, we describe the most significant qualities of unnatural base pairs and their actual applications.}, author = {Mukba, S. A. and Vlasov, Petr and Kolosov, P. M. and Shuvalova, E. Y. and Egorova, T. V. and Alkalaeva, E. Z.}, issn = {16083245}, journal = {Molecular Biology}, number = {4}, pages = {475--484}, publisher = {Springer Nature}, title = {{Expanding the genetic code: Unnatural base pairs in biological systems}}, doi = {10.1134/S0026893320040111}, volume = {54}, year = {2020}, } @article{8321, abstract = {The genetic code is considered to use five nucleic bases (adenine, guanine, cytosine, thymine and uracil), which form two pairs for encoding information in DNA and two pairs for encoding information in RNA. Nevertheless, in recent years several artificial base pairs have been developed in attempts to expand the genetic code. Employment of these additional base pairs increases the information capacity and variety of DNA sequences, and provides a platform for the site-specific, enzymatic incorporation of extra functional components into DNA and RNA. As a result, of the development of such expanded systems, many artificial base pairs have been synthesized and tested under various conditions. Following many stages of enhancement, unnatural base pairs have been modified to eliminate their weak points, qualifying them for specific research needs. Moreover, the first attempts to create a semi-synthetic organism containing DNA with unnatural base pairs seem to have been successful. This further extends the possible applications of these kinds of pairs. Herein, we describe the most significant qualities of unnatural base pairs and their actual applications.}, author = {Mukba, S. A. and Vlasov, Petr and Kolosov, P. M. and Shuvalova, E. Y. and Egorova, T. V. and Alkalaeva, E. Z.}, issn = {00268984}, journal = {Molekuliarnaia biologiia}, number = {4}, pages = {531--541}, publisher = {Russian Academy of Sciences}, title = {{Expanding the genetic code: Unnatural base pairs in biological systems}}, doi = {10.31857/S0026898420040126}, volume = {54}, year = {2020}, } @article{8645, abstract = {Epistasis, the context-dependence of the contribution of an amino acid substitution to fitness, is common in evolution. To detect epistasis, fitness must be measured for at least four genotypes: the reference genotype, two different single mutants and a double mutant with both of the single mutations. For higher-order epistasis of the order n, fitness has to be measured for all 2n genotypes of an n-dimensional hypercube in genotype space forming a ‘combinatorially complete dataset’. So far, only a handful of such datasets have been produced by manual curation. Concurrently, random mutagenesis experiments have produced measurements of fitness and other phenotypes in a high-throughput manner, potentially containing a number of combinatorially complete datasets. We present an effective recursive algorithm for finding all hypercube structures in random mutagenesis experimental data. To test the algorithm, we applied it to the data from a recent HIS3 protein dataset and found all 199 847 053 unique combinatorially complete genotype combinations of dimensionality ranging from 2 to 12. The algorithm may be useful for researchers looking for higher-order epistasis in their high-throughput experimental data.}, author = {Esteban, Laura A and Lonishin, Lyubov R and Bobrovskiy, Daniil M and Leleytner, Gregory and Bogatyreva, Natalya S and Kondrashov, Fyodor and Ivankov, Dmitry N }, issn = {1460-2059}, journal = {Bioinformatics}, number = {6}, pages = {1960--1962}, publisher = {Oxford Academic}, title = {{HypercubeME: Two hundred million combinatorially complete datasets from a single experiment}}, doi = {10.1093/bioinformatics/btz841}, volume = {36}, year = {2020}, } @article{8707, abstract = {Dynamic changes in the three-dimensional (3D) organization of chromatin are associated with central biological processes, such as transcription, replication and development. Therefore, the comprehensive identification and quantification of these changes is fundamental to understanding of evolutionary and regulatory mechanisms. Here, we present Comparison of Hi-C Experiments using Structural Similarity (CHESS), an algorithm for the comparison of chromatin contact maps and automatic differential feature extraction. We demonstrate the robustness of CHESS to experimental variability and showcase its biological applications on (1) interspecies comparisons of syntenic regions in human and mouse models; (2) intraspecies identification of conformational changes in Zelda-depleted Drosophila embryos; (3) patient-specific aberrant chromatin conformation in a diffuse large B-cell lymphoma sample; and (4) the systematic identification of chromatin contact differences in high-resolution Capture-C data. In summary, CHESS is a computationally efficient method for the comparison and classification of changes in chromatin contact data.}, author = { Galan, Silvia and Machnik, Nick N and Kruse, Kai and Díaz, Noelia and Marti-Renom, Marc A and Vaquerizas, Juan M}, issn = {15461718}, journal = {Nature Genetics}, pages = {1247--1255}, publisher = {Springer Nature}, title = {{CHESS enables quantitative comparison of chromatin contact data and automatic feature extraction}}, doi = {10.1038/s41588-020-00712-y}, volume = {52}, year = {2020}, } @article{8700, abstract = {Translation termination is a finishing step of protein biosynthesis. The significant role in this process belongs not only to protein factors of translation termination but also to the nearest nucleotide environment of stop codons. There are numerous descriptions of stop codons readthrough, which is due to specific nucleotide sequences behind them. However, represented data are segmental and don’t explain the mechanism of the nucleotide context influence on translation termination. It is well known that stop codon UAA usage is preferential for A/T-rich genes, and UAG, UGA—for G/C-rich genes, which is related to an expression level of these genes. We investigated the connection between a frequency of nucleotides occurrence in 3' area of stop codons in the human genome and their influence on translation termination efficiency. We found that 3' context motif, which is cognate to the sequence of a stop codon, stimulates translation termination. At the same time, the nucleotide composition of 3' sequence that differs from stop codon, decreases translation termination efficiency.}, author = {Sokolova, E. E. and Vlasov, Petr and Egorova, T. V. and Shuvalov, A. V. and Alkalaeva, E. Z.}, issn = {16083245}, journal = {Molecular Biology}, number = {5}, pages = {739--748}, publisher = {Springer Nature}, title = {{The influence of A/G composition of 3' stop codon contexts on translation termination efficiency in eukaryotes}}, doi = {10.1134/S0026893320050088}, volume = {54}, year = {2020}, } @article{8701, abstract = {Translation termination is a finishing step of protein biosynthesis. The significant role in this process belongs not only to protein factors of translation termination but also to the nearest nucleotide environment of stop codons. There are numerous descriptions of stop codons readthrough, which is due to specific nucleotide sequences behind them. However, represented data are segmental and don’t explain the mechanism of the nucleotide context influence on translation termination. It is well known that stop codon UAA usage is preferential for A/T-rich genes, and UAG, UGA—for G/C-rich genes, which is related to an expression level of these genes. We investigated the connection between a frequency of nucleotides occurrence in 3' area of stop codons in the human genome and their influence on translation termination efficiency. We found that 3' context motif, which is cognate to the sequence of a stop codon, stimulates translation termination. At the same time, the nucleotide composition of 3' sequence that differs from stop codon, decreases translation termination efficiency.}, author = {Sokolova, E. E. and Vlasov, Petr and Egorova, T. V. and Shuvalov, A. V. and Alkalaeva, E. Z.}, issn = {00268984}, journal = {Molekuliarnaia biologiia}, number = {5}, pages = {837--848}, publisher = {Russian Academy of Sciences}, title = {{The influence of A/G composition of 3' stop codon contexts on translation termination efficiency in eukaryotes}}, doi = {10.31857/S0026898420050080}, volume = {54}, year = {2020}, } @inproceedings{15071, abstract = {A mesophilic methanogenic culture, designated JL01, was isolated from Holocene permafrost in the Russian Arctic [1]. After long-term extensive cultivation at 15°C it turned out to be a tied binary culture of archaeal (JL01) and bacterial (Sphaerochaeta associata GLS2) strains. Strain JL01 was a strict anaerobe and grew on methanol, acetate and methylamines as energy and carbon sources. Cells were irregular coccoid, non-motile, non-spore-forming, and Gram-stainpositive. Optimum conditions for growth were 24-28 oC, pH 6.8–7.3 and 0.075-0.1 M NaCl. Phylogenetic tree reconstructions based on 16S rRNA and concatenated alignment of broadly conserved protein-coding genes revealed its close relation to Methanosarcina mazei S-6 T (similarity 99.5%). The comparison of whole genomic sequences (ANI) of the isolate and the type strain of M.mazei was 98.5%, which is higher than the values recommended for new species. Thus strain JL01 (=VKM B-2370=JCM 31898) represents the first M. mazei isolated from permanently subzero Arcticsediments. The long-term co-cultivation of JL01 with S. associata GLS2T showed the methane production without any additional carbon and energy sources. Genome analysis of S. associata GLS2T revealed putative genes involved in methanochondroithin catabolism.}, author = {Oshurkova, Viktoriia and Troshina, Olga and Trubitsyn, Vladimir and Ryzhmanova, Yana and Bochkareva, Olga and Shcherbakova, Viktoria}, booktitle = {Proceedings of 1st International Electronic Conference on Microbiology}, location = {Virtual}, publisher = {MDPI}, title = {{Characterization of methanosarcina mazei JL01 isolated from holocene arctic permafrost and study of the archaeon cooperation with bacterium Sphaerochaeta associata GLS2T}}, doi = {10.3390/ecm2020-07116}, year = {2020}, } @article{7889, abstract = {Autoluminescent plants engineered to express a bacterial bioluminescence gene cluster in plastids have not been widely adopted because of low light output. We engineered tobacco plants with a fungal bioluminescence system that converts caffeic acid (present in all plants) into luciferin and report self-sustained luminescence that is visible to the naked eye. Our findings could underpin development of a suite of imaging tools for plants.}, author = {Mitiouchkina, Tatiana and Mishin, Alexander S. and Gonzalez Somermeyer, Louisa and Markina, Nadezhda M. and Chepurnyh, Tatiana V. and Guglya, Elena B. and Karataeva, Tatiana A. and Palkina, Kseniia A. and Shakhova, Ekaterina S. and Fakhranurova, Liliia I. and Chekova, Sofia V. and Tsarkova, Aleksandra S. and Golubev, Yaroslav V. and Negrebetsky, Vadim V. and Dolgushin, Sergey A. and Shalaev, Pavel V. and Shlykov, Dmitry and Melnik, Olesya A. and Shipunova, Victoria O. and Deyev, Sergey M. and Bubyrev, Andrey I. and Pushin, Alexander S. and Choob, Vladimir V. and Dolgov, Sergey V. and Kondrashov, Fyodor and Yampolsky, Ilia V. and Sarkisyan, Karen S.}, issn = {1546-1696}, journal = {Nature Biotechnology}, pages = {944--946}, publisher = {Springer Nature}, title = {{Plants with genetically encoded autoluminescence}}, doi = {10.1038/s41587-020-0500-9}, volume = {38}, year = {2020}, } @article{6419, abstract = {Characterizing the fitness landscape, a representation of fitness for a large set of genotypes, is key to understanding how genetic information is interpreted to create functional organisms. Here we determined the evolutionarily-relevant segment of the fitness landscape of His3, a gene coding for an enzyme in the histidine synthesis pathway, focusing on combinations of amino acid states found at orthologous sites of extant species. Just 15% of amino acids found in yeast His3 orthologues were always neutral while the impact on fitness of the remaining 85% depended on the genetic background. Furthermore, at 67% of sites, amino acid replacements were under sign epistasis, having both strongly positive and negative effect in different genetic backgrounds. 46% of sites were under reciprocal sign epistasis. The fitness impact of amino acid replacements was influenced by only a few genetic backgrounds but involved interaction of multiple sites, shaping a rugged fitness landscape in which many of the shortest paths between highly fit genotypes are inaccessible.}, author = {Pokusaeva, Victoria and Usmanova, Dinara R. and Putintseva, Ekaterina V. and Espinar, Lorena and Sarkisyan, Karen and Mishin, Alexander S. and Bogatyreva, Natalya S. and Ivankov, Dmitry and Akopyan, Arseniy and Avvakumov, Sergey and Povolotskaya, Inna S. and Filion, Guillaume J. and Carey, Lucas B. and Kondrashov, Fyodor}, issn = {15537404}, journal = {PLoS Genetics}, number = {4}, publisher = {Public Library of Science}, title = {{An experimental assay of the interactions of amino acids from orthologous sequences shaping a complex fitness landscape}}, doi = {10.1371/journal.pgen.1008079}, volume = {15}, year = {2019}, } @misc{9790, author = {Pokusaeva, Victoria and Usmanova, Dinara R. and Putintseva, Ekaterina V. and Espinar, Lorena and Sarkisyan, Karen and Mishin, Alexander S. and Bogatyreva, Natalya S. and Ivankov, Dmitry and Akopyan, Arseniy and Avvakumov, Sergey and Povolotskaya, Inna S. and Filion, Guillaume J. and Carey, Lucas B. and Kondrashov, Fyodor}, publisher = {Public Library of Science}, title = {{A statistical summary of segment libraries and sequencing results}}, doi = {10.1371/journal.pgen.1008079.s011}, year = {2019}, } @misc{9797, author = {Pokusaeva, Victoria and Usmanova, Dinara R. and Putintseva, Ekaterina V. and Espinar, Lorena and Sarkisyan, Karen and Mishin, Alexander S. and Bogatyreva, Natalya S. and Ivankov, Dmitry and Akopyan, Arseniy and Povolotskaya, Inna S. and Filion, Guillaume J. and Carey, Lucas B. and Kondrashov, Fyodor}, publisher = {Public Library of Science}, title = {{A statistical summary of segment libraries and sequencing results}}, doi = {10.1371/journal.pgen.1008079.s011}, year = {2019}, } @misc{9789, author = {Pokusaeva, Victoria and Usmanova, Dinara R. and Putintseva, Ekaterina V. and Espinar, Lorena and Sarkisyan, Karen and Mishin, Alexander S. and Bogatyreva, Natalya S. and Ivankov, Dmitry and Akopyan, Arseniy and Avvakumov, Sergey and Povolotskaya, Inna S. and Filion, Guillaume J. and Carey, Lucas B. and Kondrashov, Fyodor}, publisher = {Public Library of Science}, title = {{Multiple alignment of His3 orthologues}}, doi = {10.1371/journal.pgen.1008079.s010}, year = {2019}, } @article{6506, abstract = {How does environmental complexity affect the evolution of single genes? Here, we measured the effects of a set of Bacillus subtilis glutamate dehydrogenase mutants across 19 different environments—from phenotypically homogeneous single-cell populations in liquid media to heterogeneous biofilms, plant roots and soil populations. The effects of individual gene mutations on organismal fitness were highly reproducible in liquid cultures. However, 84% of the tested alleles showed opposing fitness effects under different growth conditions (sign environmental pleiotropy). In colony biofilms and soil samples, different alleles dominated in parallel replica experiments. Accordingly, we found that in these heterogeneous cell populations the fate of mutations was dictated by a combination of selection and drift. The latter relates to programmed prophage excisions that occurred during biofilm development. Overall, for each condition, a wide range of glutamate dehydrogenase mutations persisted and sometimes fixated as a result of the combined action of selection, pleiotropy and chance. However, over longer periods and in multiple environments, nearly all of this diversity would be lost—across all the environments and conditions that we tested, the wild type was the fittest allele.}, author = {Noda-García, Lianet and Davidi, Dan and Korenblum, Elisa and Elazar, Assaf and Putintseva, Ekaterina and Aharoni, Asaph and Tawfik, Dan S.}, issn = {2058-5276}, journal = {Nature Microbiology}, number = {7}, pages = {1221–1230}, publisher = {Springer Nature}, title = {{Chance and pleiotropy dominate genetic diversity in complex bacterial environments}}, doi = {10.1038/s41564-019-0412-y}, volume = {4}, year = {2019}, } @misc{9731, abstract = {OGs with putative pseudogenes by the number of affected genomes in different chlamydial species. Frameshift and nonsense mutations located less than 60 bp upstreamof the gene end or present in a single genome from the corresponding OG were excluded. (CSV 31 kb)}, author = {Sigalova, Olga and Chaplin, Andrei and Bochkareva, Olga and Shelyakin, Pavel and Filaretov, Vsevolod and Akkuratov, Evgeny and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 11 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808772.v1}, year = {2019}, } @misc{9783, abstract = {Predicted frameshift and nonsense mutations in Chlamydial pan-genome. For the analysis of putative pseudogenes, events located less than 60 bp. away from gene end or present in a single genome from the corresponding OG were excluded. (CSV 600 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 10 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808760.v1}, year = {2019}, } @misc{9897, abstract = {Frameshift and nonsense mutations near homopolymeric tracts of OG1 genes. Only 374 genes with typical length and domain composition were considered. (CSV 6 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 20 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808850.v1}, year = {2019}, } @misc{9890, abstract = {Distribution of OGs with mosaic phyletic patterns across species (complete genomes only). (CSV 7 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 15 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808802.v1}, year = {2019}, } @misc{9892, abstract = {Distribution of OGs with mosaic phyletic patterns across species (all genomes). (CSV 10 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 16 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808814.v1}, year = {2019}, } @misc{9893, abstract = {Summary of peripheral genesa phyletic patterns and tree concordance. (CSV 26 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 17 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808820.v1}, year = {2019}, } @misc{9894, abstract = {Orthologous families (OFs) derived by MCL clustering of OGs. (CSV 189 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 18 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808826.v1}, year = {2019}, } @misc{9895, abstract = {Additional information on proteins from OG1. (CSV 30 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 19 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808835.v1}, year = {2019}, } @misc{9896, abstract = {Summary of the analysed genomes. (CSV 24 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 1 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808841.v1}, year = {2019}, } @article{6898, abstract = {Background Chlamydia are ancient intracellular pathogens with reduced, though strikingly conserved genome. Despite their parasitic lifestyle and isolated intracellular environment, these bacteria managed to avoid accumulation of deleterious mutations leading to subsequent genome degradation characteristic for many parasitic bacteria. Results We report pan-genomic analysis of sixteen species from genus Chlamydia including identification and functional annotation of orthologous genes, and characterization of gene gains, losses, and rearrangements. We demonstrate the overall genome stability of these bacteria as indicated by a large fraction of common genes with conserved genomic locations. On the other hand, extreme evolvability is confined to several paralogous gene families such as polymorphic membrane proteins and phospholipase D, and likely is caused by the pressure from the host immune system. Conclusions This combination of a large, conserved core genome and a small, evolvable periphery likely reflect the balance between the selective pressure towards genome reduction and the need to adapt to escape from the host immunity.}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, issn = {14712164}, journal = {BMC Genomics}, number = {1}, publisher = {BioMed Central}, title = {{Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.1186/s12864-019-6059-5}, volume = {20}, year = {2019}, } @misc{9898, abstract = {All polyN tracts of length 5 or more nucleotides in sequences of genes from OG1. Sequences were extracted and scanned prior to automatic correction for frameshifts implemented in the RAST pipeline. (CSV 133 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 21 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808859.v1}, year = {2019}, } @misc{9901, abstract = {Clusters of Orthologous Genes (COGs) and corresponding functional categories assigned to OGs. (CSV 117 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 9 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808907.v1}, year = {2019}, } @misc{9899, abstract = {Summary of orthologous groups (OGs) for 227 genomes of genus Chlamydia. (CSV 362 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 2 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808865.v1}, year = {2019}, } @misc{9900, abstract = {Pan-genome statistics by species. (CSV 3 kb)}, author = {Sigalova, Olga M. and Chaplin, Andrei V. and Bochkareva, Olga and Shelyakin, Pavel V. and Filaretov, Vsevolod A. and Akkuratov, Evgeny E. and Burskaia, Valentina and Gelfand, Mikhail S.}, publisher = {Springer Nature}, title = {{Additional file 5 of Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction}}, doi = {10.6084/m9.figshare.9808886.v1}, year = {2019}, } @article{7181, abstract = {Multiple sequence alignments (MSAs) are used for structural1,2 and evolutionary predictions1,2, but the complexity of aligning large datasets requires the use of approximate solutions3, including the progressive algorithm4. Progressive MSA methods start by aligning the most similar sequences and subsequently incorporate the remaining sequences, from leaf-to-root, based on a guide-tree. Their accuracy declines substantially as the number of sequences is scaled up5. We introduce a regressive algorithm that enables MSA of up to 1.4 million sequences on a standard workstation and substantially improves accuracy on datasets larger than 10,000 sequences. Our regressive algorithm works the other way around to the progressive algorithm and begins by aligning the most dissimilar sequences. It uses an efficient divide-and-conquer strategy to run third-party alignment methods in linear time, regardless of their original complexity. Our approach will enable analyses of extremely large genomic datasets such as the recently announced Earth BioGenome Project, which comprises 1.5 million eukaryotic genomes6.}, author = {Garriga, Edgar and Di Tommaso, Paolo and Magis, Cedrik and Erb, Ionas and Mansouri, Leila and Baltzis, Athanasios and Laayouni, Hafid and Kondrashov, Fyodor and Floden, Evan and Notredame, Cedric}, issn = {15461696}, journal = {Nature Biotechnology}, number = {12}, pages = {1466--1470}, publisher = {Springer Nature}, title = {{Large multiple sequence alignments with a root-to-leaf regressive method}}, doi = {10.1038/s41587-019-0333-6}, volume = {37}, year = {2019}, } @misc{13059, abstract = {This dataset contains a GitHub repository containing all the data, analysis, Nextflow workflows and Jupyter notebooks to replicate the manuscript titled "Fast and accurate large multiple sequence alignments with a root-to-leaf regressive method". It also contains the Multiple Sequence Alignments (MSAs) generated and well as the main figures and tables from the manuscript. The repository is also available at GitHub (https://github.com/cbcrg/dpa-analysis) release `v1.2`. For details on how to use the regressive alignment algorithm, see the T-Coffee software suite (https://github.com/cbcrg/tcoffee).}, author = {Garriga, Edgar and di Tommaso, Paolo and Magis, Cedrik and Erb, Ionas and Mansouri, Leila and Baltzis, Athanasios and Laayouni, Hafid and Kondrashov, Fyodor and Floden, Evan and Notredame, Cedric}, publisher = {Zenodo}, title = {{Fast and accurate large multiple sequence alignments with a root-to-leaf regressive method}}, doi = {10.5281/ZENODO.2025846}, year = {2018}, } @article{384, abstract = {Can orthologous proteins differ in terms of their ability to be secreted? To answer this question, we investigated the distribution of signal peptides within the orthologous groups of Enterobacterales. Parsimony analysis and sequence comparisons revealed a large number of signal peptide gain and loss events, in which signal peptides emerge or disappear in the course of evolution. Signal peptide losses prevail over gains, an effect which is especially pronounced in the transition from the free-living or commensal to the endosymbiotic lifestyle. The disproportionate decline in the number of signal peptide-containing proteins in endosymbionts cannot be explained by the overall reduction of their genomes. Signal peptides can be gained and lost either by acquisition/elimination of the corresponding N-terminal regions or by gradual accumulation of mutations. The evolutionary dynamics of signal peptides in bacterial proteins represents a powerful mechanism of functional diversification.}, author = {Hönigschmid, Peter and Bykova, Nadya and Schneider, René and Ivankov, Dmitry and Frishman, Dmitrij}, journal = {Genome Biology and Evolution}, number = {3}, pages = {928 -- 938}, publisher = {Oxford University Press}, title = {{Evolutionary interplay between symbiotic relationships and patterns of signal peptide gain and loss}}, doi = {10.1093/gbe/evy049}, volume = {10}, year = {2018}, } @article{5780, abstract = {Bioluminescence is found across the entire tree of life, conferring a spectacular set of visually oriented functions from attracting mates to scaring off predators. Half a dozen different luciferins, molecules that emit light when enzymatically oxidized, are known. However, just one biochemical pathway for luciferin biosynthesis has been described in full, which is found only in bacteria. Here, we report identification of the fungal luciferase and three other key enzymes that together form the biosynthetic cycle of the fungal luciferin from caffeic acid, a simple and widespread metabolite. Introduction of the identified genes into the genome of the yeast Pichia pastoris along with caffeic acid biosynthesis genes resulted in a strain that is autoluminescent in standard media. We analyzed evolution of the enzymes of the luciferin biosynthesis cycle and found that fungal bioluminescence emerged through a series of events that included two independent gene duplications. The retention of the duplicated enzymes of the luciferin pathway in nonluminescent fungi shows that the gene duplication was followed by functional sequence divergence of enzymes of at least one gene in the biosynthetic pathway and suggests that the evolution of fungal bioluminescence proceeded through several closely related stepping stone nonluminescent biochemical reactions with adaptive roles. The availability of a complete eukaryotic luciferin biosynthesis pathway provides several applications in biomedicine and bioengineering.}, author = {Kotlobay, Alexey A. and Sarkisyan, Karen and Mokrushina, Yuliana A. and Marcet-Houben, Marina and Serebrovskaya, Ekaterina O. and Markina, Nadezhda M. and Gonzalez Somermeyer, Louisa and Gorokhovatsky, Andrey Y. and Vvedensky, Andrey and Purtov, Konstantin V. and Petushkov, Valentin N. and Rodionova, Natalja S. and Chepurnyh, Tatiana V. and Fakhranurova, Liliia and Guglya, Elena B. and Ziganshin, Rustam and Tsarkova, Aleksandra S. and Kaskova, Zinaida M. and Shender, Victoria and Abakumov, Maxim and Abakumova, Tatiana O. and Povolotskaya, Inna S. and Eroshkin, Fedor M. and Zaraisky, Andrey G. and Mishin, Alexander S. and Dolgov, Sergey V. and Mitiouchkina, Tatiana Y. and Kopantzev, Eugene P. and Waldenmaier, Hans E. and Oliveira, Anderson G. and Oba, Yuichi and Barsova, Ekaterina and Bogdanova, Ekaterina A. and Gabaldón, Toni and Stevani, Cassius V. and Lukyanov, Sergey and Smirnov, Ivan V. and Gitelson, Josef I. and Kondrashov, Fyodor and Yampolsky, Ilia V.}, issn = {00278424}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, number = {50}, pages = {12728--12732}, publisher = {National Academy of Sciences}, title = {{Genetically encodable bioluminescent system from fungi}}, doi = {10.1073/pnas.1803615115}, volume = {115}, year = {2018}, } @article{279, abstract = {Background: Natural selection shapes cancer genomes. Previous studies used signatures of positive selection to identify genes driving malignant transformation. However, the contribution of negative selection against somatic mutations that affect essential tumor functions or specific domains remains a controversial topic. Results: Here, we analyze 7546 individual exomes from 26 tumor types from TCGA data to explore the portion of the cancer exome under negative selection. Although we find most of the genes neutrally evolving in a pan-cancer framework, we identify essential cancer genes and immune-exposed protein regions under significant negative selection. Moreover, our simulations suggest that the amount of negative selection is underestimated. We therefore choose an empirical approach to identify genes, functions, and protein regions under negative selection. We find that expression and mutation status of negatively selected genes is indicative of patient survival. Processes that are most strongly conserved are those that play fundamental cellular roles such as protein synthesis, glucose metabolism, and molecular transport. Intriguingly, we observe strong signals of selection in the immunopeptidome and proteins controlling peptide exposition, highlighting the importance of immune surveillance evasion. Additionally, tumor type-specific immune activity correlates with the strength of negative selection on human epitopes. Conclusions: In summary, our results show that negative selection is a hallmark of cell essentiality and immune response in cancer. The functional domains identified could be exploited therapeutically, ultimately allowing for the development of novel cancer treatments.}, author = {Zapata, Luis and Pich, Oriol and Serrano, Luis and Kondrashov, Fyodor and Ossowski, Stephan and Schaefer, Martin}, journal = {Genome Biology}, publisher = {BioMed Central}, title = {{Negative selection in tumor genome evolution acts on essential cellular functions and the immunopeptidome}}, doi = {10.1186/s13059-018-1434-0}, volume = {19}, year = {2018}, } @misc{9812, abstract = {This document contains the full list of genes with their respective significance and dN/dS values. (TXT 4499Â kb)}, author = {Zapata, Luis and Pich, Oriol and Serrano, Luis and Kondrashov, Fyodor and Ossowski, Stephan and Schaefer, Martin}, publisher = {Springer Nature}, title = {{Additional file 2: Of negative selection in tumor genome evolution acts on essential cellular functions and the immunopeptidome}}, doi = {10.6084/m9.figshare.6401414.v1}, year = {2018}, } @misc{9811, abstract = {This document contains additional supporting evidence presented as supplemental tables. (XLSX 50Â kb)}, author = {Zapata, Luis and Pich, Oriol and Serrano, Luis and Kondrashov, Fyodor and Ossowski, Stephan and Schaefer, Martin}, publisher = {Springer Nature}, title = {{Additional file 1: Of negative selection in tumor genome evolution acts on essential cellular functions and the immunopeptidome}}, doi = {10.6084/m9.figshare.6401390.v1}, year = {2018}, } @article{5995, abstract = {Motivation Computational prediction of the effect of mutations on protein stability is used by researchers in many fields. The utility of the prediction methods is affected by their accuracy and bias. Bias, a systematic shift of the predicted change of stability, has been noted as an issue for several methods, but has not been investigated systematically. Presence of the bias may lead to misleading results especially when exploring the effects of combination of different mutations. Results Here we use a protocol to measure the bias as a function of the number of introduced mutations. It is based on a self-consistency test of the reciprocity the effect of a mutation. An advantage of the used approach is that it relies solely on crystal structures without experimentally measured stability values. We applied the protocol to four popular algorithms predicting change of protein stability upon mutation, FoldX, Eris, Rosetta and I-Mutant, and found an inherent bias. For one program, FoldX, we manage to substantially reduce the bias using additional relaxation by Modeller. Authors using algorithms for predicting effects of mutations should be aware of the bias described here.}, author = {Usmanova, Dinara R and Bogatyreva, Natalya S and Ariño Bernad, Joan and Eremina, Aleksandra A and Gorshkova, Anastasiya A and Kanevskiy, German M and Lonishin, Lyubov R and Meister, Alexander V and Yakupova, Alisa G and Kondrashov, Fyodor and Ivankov, Dmitry}, issn = {1367-4803}, journal = {Bioinformatics}, number = {21}, pages = {3653--3658}, publisher = {Oxford University Press }, title = {{Self-consistency test reveals systematic bias in programs for prediction change of stability upon mutation}}, doi = {10.1093/bioinformatics/bty340}, volume = {34}, year = {2018}, } @article{3771, abstract = {The small-sized frugivorous bat Carollia perspicillata is an understory specialist and occurs in a wide range of lowland habitats, tending to be more common in tropical dry or moist forests of South and Central America. Its sister species, Carollia brevicauda, occurs almost exclusively in the Amazon rainforest. A recent phylogeographic study proposed a hypothesis of origin and subsequent diversification for C. perspicillata along the Atlantic coastal forest of Brazil. Additionally, it also found two allopatric clades for C. brevicauda separated by the Amazon Basin. We used cytochrome b gene sequences and a more extensive sampling to test hypotheses related to the origin and diversification of C. perspicillata plus C. brevicauda clade in South America. The results obtained indicate that there are two sympatric evolutionary lineages within each species. In C. perspicillata, one lineage is limited to the Southern Atlantic Forest, whereas the other is widely distributed. Coalescent analysis points to a simultaneous origin for C. perspicillata and C. brevicauda, although no place for the diversification of each species can be firmly suggested. The phylogeographic pattern shown by C. perspicillata is also congruent with the Pleistocene refugia hypothesis as a likely vicariant phenomenon shaping the present distribution of its intraspecific lineages.}, author = {Pavan, Ana and Martins, Felipe and Santos, Fabrício and Ditchfield, Albert and Fernandes Redondo, Rodrigo A}, journal = {Biological Journal of the Linnean Society}, number = {3}, pages = {527 -- 539}, publisher = {Wiley-Blackwell}, title = {{Patterns of diversification in two species of short-tailed bats (Carollia Gray, 1838): the effects of historical fragmentation of Brazilian rainforests.}}, doi = {10.1111/j.1095-8312.2010.01601.x}, volume = {102}, year = {2011}, }