@phdthesis{20811,
  abstract     = {	This thesis is organized into two parts, each comprising two chapters: Chapter 1 and 2 offer models for the evolution of vaccine resistance in response to diverse vaccination strategies. Chapter 3 and 4 review the statistics of records, their connection to models of innovation and an application to the cultural evolution of sports.
	In chapter 1 we present a modelling study from 2021 on the evolution of SARS-CoV-2. At that time the vaccine-resistant Omicron variant had not yet evolved. In our model we consider a population that is becoming vaccinated over time, while a pathogen is spreading in the population and eventually becoming resistant to the vaccine. We explore effective pharmaceutical and non-pharmaceutical interventions to prevent the emergence of vaccine resistance. 
	In chapter 2 we model a particular set of complex vaccination strategies, mosaic and pyramid vaccination, where an immunologically diverse portfolio of vaccines is considered. We find that a bet-hatching strategy, in which vaccine types are distributed in the population, is effective at hindering the evolution of vaccine resistance if mutation rates are high. 
	In chapter 3 we switch gears and present a review on the statistics of records. We highlight similarities and analogies to other models in the fields of statistical physics, evolution and innovation. This offers interesting complimentary perspectives on well-known models. 
	In chapter 4 we apply models of record statistics and innovation to study cultural evolution in sport. We propose a model of sport evolution that combines deterministic improvements in performance and stochastic bursts of improvements due to innovation. },
  author       = {Rella, Simon},
  issn         = {2663-337X},
  pages        = {95},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Adaptive processes in biology and culture : Models of evolving vaccine resistance and the record statistics of innovation}},
  doi          = {10.15479/AT-ISTA-20811},
  year         = {2025},
}

@article{19626,
  abstract     = {Active regulation of gene expression, orchestrated by complex interactions of activators and repressors at promoters, controls the fate of organisms. In contrast, basal expression at uninduced promoters is considered to be a dynamically inert mode of nonfunctional “promoter leakiness,” merely a byproduct of transcriptional regulation. Here, we investigate the basal expression mode of the mar operon, the main regulator of intrinsic multiple antibiotic resistance in Escherichia coli, and link its dynamic properties to the noncanonical, yet highly conserved start codon of marR across Enterobacteriaceae. Real-time, single-cell measurements across tens of generations reveal that basal expression consists of rare stochastic gene expression pulses, which maximize variability in wildtype and, surprisingly, transiently accelerate cellular elongation rates. Competition experiments show that basal expression confers fitness advantages to wildtype across several transitions between exponential and stationary growth by shortening lag times. The dynamically rich basal expression of the mar operon has likely been evolutionarily maintained for its role in growth homeostasis of Enterobacteria within the gut environment, thereby allowing other ancillary gene regulatory roles to evolve, e.g., control of costly-to-induce multidrug efflux pumps. Understanding the complex selection forces governing genetic systems involved in intrinsic multidrug resistance is crucial for effective public health measures.},
  author       = {Jain, Kirti and Hauschild, Robert and Bochkareva, Olga and Römhild, Roderich and Tkačik, Gašper and Guet, Calin C},
  issn         = {1091-6490},
  journal      = {Proceedings of the National Academy of Sciences},
  number       = {15},
  publisher    = {National Academy of Sciences},
  title        = {{Pulsatile basal gene expression as a fitness determinant in bacteria}},
  doi          = {10.1073/pnas.2413709122},
  volume       = {122},
  year         = {2025},
}

@misc{19294,
  abstract     = {Active regulation of gene expression, orchestrated by complex interactions of activators and repressors at promoters, controls the fate of organisms. In contrast, basal expression at uninduced promoters is considered to be a dynamically inert mode of non-functional “promoter leakiness”, merely a byproduct of transcriptional regulation. Here, we investigate the basal expression mode of the mar operon, the main regulator of intrinsic multiple antibiotic resistance in Escherichia coli, and link its dynamic properties to the non-canonical, yet highly conserved start codon of marR across Enterobacteriaceae. Real-time, single-cell measurements across tens of generations reveal that basal expression consists of rare stochastic gene expression pulses, which maximize variability in wildtype and, surprisingly, transiently accelerate cellular elongation rates. Competition experiments show that basal expression confers fitness advantages to wildtype across several transitions between exponential and stationary growth by shortening lag times. The dynamically rich basal expression of the mar operon has likely been evolutionarily maintained for its role in growth homeostasis of Enterobacteria within the gut environment, thereby allowing other ancillary gene regulatory roles to evolve, e.g. control of costly-to-induce multi-drug efflux pumps. Understanding the complex selection forces governing genetic systems involved in intrinsic multi-drug resistance is crucial for effective public health measures.},
  author       = {Jain, Kirti and Hauschild, Robert and Bochkareva, Olga and Römhild, Roderich and Tkačik, Gašper and Guet, Calin C},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Data for "Pulsatile basal gene expression as a fitness determinant in bacteria"}},
  doi          = {10.15479/AT:ISTA:19294},
  year         = {2025},
}

@article{15179,
  abstract     = {The fungal bioluminescence pathway can be reconstituted in other organisms allowing luminescence imaging without exogenously supplied substrate. The pathway starts from hispidin biosynthesis—a step catalyzed by a large fungal polyketide synthase that requires a posttranslational modification for activity. Here, we report identification of alternative compact hispidin synthases encoded by a phylogenetically diverse group of plants. A hybrid bioluminescence pathway that combines plant and fungal genes is more compact, not dependent on availability of machinery for posttranslational modifications, and confers autonomous bioluminescence in yeast, mammalian, and plant hosts. The compact size of plant hispidin synthases enables additional modes of delivery of autoluminescence, such as delivery with viral vectors.},
  author       = {Palkina, Kseniia A. and Karataeva, Tatiana A. and Perfilov, Maxim M. and Fakhranurova, Liliia I. and Markina, Nadezhda M. and Gonzalez Somermeyer, Louisa and Garcia-Perez, Elena and Vazquez-Vilar, Marta and Rodriguez-Rodriguez, Marta and Vazquez-Vilriales, Victor and Shakhova, Ekaterina S. and Mitiouchkina, Tatiana and Belozerova, Olga A. and Kovalchuk, Sergey I. and Alekberova, Anna and Malyshevskaia, Alena K. and Bugaeva, Evgenia N. and Guglya, Elena B. and Balakireva, Anastasia and Sytov, Nikita and Bezlikhotnova, Anastasia and Boldyreva, Daria I. and Babenko, Vladislav V. and Kondrashov, Fyodor and Choob, Vladimir V. and Orzaez, Diego and Yampolsky, Ilia V. and Mishin, Alexander S. and Sarkisyan, Karen S.},
  issn         = {2375-2548},
  journal      = {Science Advances},
  number       = {10},
  publisher    = {American Association for the Advancement of Science},
  title        = {{A hybrid pathway for self-sustained luminescence}},
  doi          = {10.1126/sciadv.adk1992},
  volume       = {10},
  year         = {2024},
}

@phdthesis{17850,
  abstract     = {Understanding the relationship between a given phenotype and its underlying genotype or genotypes is one of the most pressing challenges of biology, as it lies at the heart of not only basic understanding of evolutionary theory, but also of practical applications in medicine and bioengineering. Understanding this relationship is complicated by the ubiquitous phenomenon of epistasis, wherein mutation effects are dependent on their genetic context. Fitness landscapes — representations of phenotype as a function of genotype — are being increasingly used as a tool to study the effects and interactions of thousands of mutations, but are experimentally limited to exploring a small fraction of a protein’s theoretical sequence space. Furthermore, not all regions of said sequence space are necessarily equally informative. Thus, gene selection for landscape surveys should be carefully considered in order to maximize the usable output of necessarily limited data.

In this work, we analyzed the fitness landscapes of orthologous green fluorescent proteins from four different species, by systematically measuring the phenotype, fluorescence, of tens of thousands of mutant genotypes from each protein. These landscapes were highly heterogeneous, with some genes being mutationally robust and displaying epistasis only rarely, and others being highly epistatic and mutationally fragile. We used this data to train machine learning models to predict fluorescence from genotype. Although the training data contained almost exclusively genotypes with less than 3% sequence divergence from the original wild-type sequences, we were able to create novel, functional genotypes with up to 20% sequence divergence. Counterintuitively however, genes with high mutational robustness and rare epistasis were more difficult to introduce large numbers of mutations into, not less. This represents the first study of large-scale fitness landscapes of a protein family, and provides insights into how to approach future landscape surveys and their applications in novel protein design.},
  author       = {Gonzalez Somermeyer, Louisa},
  issn         = {2663-337X},
  pages        = {89},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Fitness landscapes of orthologous green fluorescent proteins}},
  doi          = {10.15479/at:ista:17850},
  year         = {2024},
}

@article{15362,
  abstract     = {Constitutional heterozygous pathogenic variants in the exonuclease domain of POLE and POLD1, which affect the proofreading activity of the corresponding polymerases, cause a cancer predisposition syndrome characterized by increased risk of gastrointestinal polyposis, colorectal cancer, endometrial cancer and other tumor types. The generally accepted explanation for the connection between the disruption of the proofreading activity of polymerases epsilon and delta and cancer development is through an increase in the somatic mutation rate. Here we studied an extended family with multiple members heterozygous for the pathogenic POLD1 variant c.1421T>C p.(Leu474Pro), which segregates with the polyposis and cancer phenotypes. Through the analysis of mutational patterns of patient-derived fibroblasts colonies and de novo mutations obtained by parent-offspring comparisons, we concluded that heterozygous POLD1 L474P just subtly increases the somatic and germline mutation burden. In contrast, tumors developed in individuals with a heterozygous mutation in the exonuclease domain of POLD1, including L474P, have an extremely high mutation rate (>100 mut/Mb) associated with signature SBS10d. We solved this contradiction through the observation that tumorigenesis involves somatic inactivation of the wildtype POLD1 allele. These results imply that exonuclease deficiency of polymerase delta has a recessive effect on mutation rate.},
  author       = {Andrianova, Maria A. and Seplyarskiy, Vladimir B. and Terradas, Mariona and Sánchez-Heras, Ana Beatriz and Mur, Pilar and Soto, José Luis and Aiza, Gemma and Borràs, Emma and Kondrashov, Fyodor and Kondrashov, Alexey S. and Bazykin, Georgii A. and Valle, Laura},
  issn         = {1476-5438},
  journal      = {European Journal of Human Genetics},
  pages        = {837--845},
  publisher    = {Springer Nature},
  title        = {{Discovery of recessive effect of human polymerase δ proofreading deficiency through mutational analysis of POLD1-mutated normal and cancer cells}},
  doi          = {10.1038/s41431-024-01598-8},
  volume       = {32},
  year         = {2024},
}

@article{13976,
  abstract     = {Conflicts and natural disasters affect entire populations of the countries involved and, in addition to the thousands of lives destroyed, have a substantial negative impact on the scientific advances these countries provide. The unprovoked invasion of Ukraine by Russia, the devastating earthquake in Turkey and Syria, and the ongoing conflicts in the Middle East are just a few examples. Millions of people have been killed or displaced, their futures uncertain. These events have resulted in extensive infrastructure collapse, with loss of electricity, transportation, and access to services. Schools, universities, and research centers have been destroyed along with decades’ worth of data, samples, and findings. Scholars in disaster areas face short- and long-term problems in terms of what they can accomplish now for obtaining grants and for employment in the long run. In our interconnected world, conflicts and disasters are no longer a local problem but have wide-ranging impacts on the entire world, both now and in the future. Here, we focus on the current and ongoing impact of war on the scientific community within Ukraine and from this draw lessons that can be applied to all affected countries where scientists at risk are facing hardship. We present and classify examples of effective and feasible mechanisms used to support researchers in countries facing hardship and discuss how these can be implemented with help from the international scientific community and what more is desperately needed. Reaching out, providing accessible training opportunities, and developing collaborations should increase inclusion and connectivity, support scientific advancements within affected communities, and expedite postwar and disaster recovery.},
  author       = {Wolfsberger, Walter and Chhugani, Karishma and Shchubelka, Khrystyna and Frolova, Alina and Salyha, Yuriy and Zlenko, Oksana and Arych, Mykhailo and Dziuba, Dmytro and Parkhomenko, Andrii and Smolanka, Volodymyr and Gümüş, Zeynep H. and Sezgin, Efe and Diaz-Lameiro, Alondra and Toth, Viktor R. and Maci, Megi and Bortz, Eric and Kondrashov, Fyodor and Morton, Patricia M. and Łabaj, Paweł P. and Romero, Veronika and Hlávka, Jakub and Mangul, Serghei and Oleksyk, Taras K.},
  issn         = {2047-217X},
  journal      = {GigaScience},
  publisher    = {Oxford University Press},
  title        = {{Scientists without borders: Lessons from Ukraine}},
  doi          = {10.1093/gigascience/giad045},
  volume       = {12},
  year         = {2023},
}

@article{14716,
  abstract     = {Background: Antimicrobial resistance (AMR) poses a significant global health threat, and an accurate prediction of bacterial resistance patterns is critical for effective treatment and control strategies. In recent years, machine learning (ML) approaches have emerged as powerful tools for analyzing large-scale bacterial AMR data. However, ML methods often ignore evolutionary relationships among bacterial strains, which can greatly impact performance of the ML methods, especially if resistance-associated features are attempted to be detected. Genome-wide association studies (GWAS) methods like linear mixed models accounts for the evolutionary relationships in bacteria, but they uncover only highly significant variants which have already been reported in literature.

Results: In this work, we introduce a novel phylogeny-related parallelism score (PRPS), which measures whether a certain feature is correlated with the population structure of a set of samples. We demonstrate that PRPS can be used, in combination with SVM- and random forest-based models, to reduce the number of features in the analysis, while simultaneously increasing models’ performance. We applied our pipeline to publicly available AMR data from PATRIC database for Mycobacterium tuberculosis against six common antibiotics.

Conclusions: Using our pipeline, we re-discovered known resistance-associated mutations as well as new candidate mutations which can be related to resistance and not previously reported in the literature. We demonstrated that taking into account phylogenetic relationships not only improves the model performance, but also yields more biologically relevant predicted most contributing resistance markers.},
  author       = {Yurtseven, Alper and Buyanova, Sofia and Agrawal, Amay Ajaykumar A. and Bochkareva, Olga and Kalinina, Olga V V.},
  issn         = {1471-2180},
  journal      = {BMC Microbiology},
  number       = {1},
  publisher    = {Springer Nature},
  title        = {{Machine learning and phylogenetic analysis allow for predicting antibiotic resistance in M. tuberculosis}},
  doi          = {10.1186/s12866-023-03147-7},
  volume       = {23},
  year         = {2023},
}

@article{12758,
  abstract     = {AlphaFold changed the field of structural biology by achieving three-dimensional (3D) structure prediction from protein sequence at experimental quality. The astounding success even led to claims that the protein folding problem is “solved”. However, protein folding problem is more than just structure prediction from sequence. Presently, it is unknown if the AlphaFold-triggered revolution could help to solve other problems related to protein folding. Here we assay the ability of AlphaFold to predict the impact of single mutations on protein stability (ΔΔG) and function. To study the question we extracted the pLDDT and <pLDDT> metrics from AlphaFold predictions before and after single mutation in a protein and correlated the predicted change with the experimentally known ΔΔG values. Additionally, we correlated the same AlphaFold pLDDT metrics with the impact of a single mutation on structure using a large scale dataset of single mutations in GFP with the experimentally assayed levels of fluorescence. We found a very weak or no correlation between AlphaFold output metrics and change of protein stability or fluorescence. Our results imply that AlphaFold may not be immediately applied to other problems or applications in protein folding.},
  author       = {Pak, Marina A. and Markhieva, Karina A. and Novikova, Mariia S. and Petrov, Dmitry S. and Vorobyev, Ilya S. and Maksimova, Ekaterina and Kondrashov, Fyodor and Ivankov, Dmitry N.},
  issn         = {1932-6203},
  journal      = {PLoS ONE},
  number       = {3},
  publisher    = {Public Library of Science},
  title        = {{Using AlphaFold to predict the impact of single mutations on protein stability and function}},
  doi          = {10.1371/journal.pone.0282689},
  volume       = {18},
  year         = {2023},
}

@article{13164,
  abstract     = {Molecular compatibility between gametes is a prerequisite for successful fertilization. As long as a sperm and egg can recognize and bind each other via their surface proteins, gamete fusion may occur even between members of separate species, resulting in hybrids that can impact speciation. The egg membrane protein Bouncer confers species specificity to gamete interactions between medaka and zebrafish, preventing their cross-fertilization. Here, we leverage this specificity to uncover distinct amino acid residues and N-glycosylation patterns that differentially influence the function of medaka and zebrafish Bouncer and contribute to cross-species incompatibility. Curiously, in contrast to the specificity observed for medaka and zebrafish Bouncer, seahorse and fugu Bouncer are compatible with both zebrafish and medaka sperm, in line with the pervasive purifying selection that dominates Bouncer’s evolution. The Bouncer-sperm interaction is therefore the product of seemingly opposing evolutionary forces that, for some species, restrict fertilization to closely related fish, and for others, allow broad gamete compatibility that enables hybridization.},
  author       = {Gert, Krista R.B. and Panser, Karin and Surm, Joachim and Steinmetz, Benjamin S. and Schleiffer, Alexander and Jovine, Luca and Moran, Yehu and Kondrashov, Fyodor and Pauli, Andrea},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{Divergent molecular signatures in fish Bouncer proteins define cross-fertilization boundaries}},
  doi          = {10.1038/s41467-023-39317-4},
  volume       = {14},
  year         = {2023},
}

@article{11587,
  abstract     = {Background: Accurate and comprehensive annotation of transcript sequences is essential for transcript quantification and differential gene and transcript expression analysis. Single-molecule long-read sequencing technologies provide improved integrity of transcript structures including alternative splicing, and transcription start and polyadenylation sites. However, accuracy is significantly affected by sequencing errors, mRNA degradation, or incomplete cDNA synthesis.
Results: We present a new and comprehensive Arabidopsis thaliana Reference Transcript Dataset 3 (AtRTD3). AtRTD3 contains over 169,000 transcripts—twice that of the best current Arabidopsis transcriptome and including over 1500 novel genes. Seventy-eight percent of transcripts are from Iso-seq with accurately defined splice junctions and transcription start and end sites. We develop novel methods to determine splice junctions and transcription start and end sites accurately. Mismatch profiles around splice junctions provide a powerful feature to distinguish correct splice junctions and remove false splice junctions. Stratified approaches identify high-confidence transcription start and end sites and remove fragmentary transcripts due to degradation. AtRTD3 is a major improvement over existing transcriptomes as demonstrated by analysis of an Arabidopsis cold response RNA-seq time-series. AtRTD3 provides higher resolution of transcript expression profiling and identifies cold-induced differential transcription start and polyadenylation site usage.
Conclusions: AtRTD3 is the most comprehensive Arabidopsis transcriptome currently. It improves the precision of differential gene and transcript expression, differential alternative splicing, and transcription start/end site usage analysis from RNA-seq data. The novel methods for identifying accurate splice junctions and transcription start/end sites are widely applicable and will improve single-molecule sequencing analysis from any species.},
  author       = {Zhang, Runxuan and Kuo, Richard and Coulter, Max and Calixto, Cristiane P.G. and Entizne, Juan Carlos and Guo, Wenbin and Marquez, Yamile and Milne, Linda and Riegler, Stefan and Matsui, Akihiro and Tanaka, Maho and Harvey, Sarah and Gao, Yubang and Wießner-Kroh, Theresa and Paniagua, Alejandro and Crespi, Martin and Denby, Katherine and Hur, Asa Ben and Huq, Enamul and Jantsch, Michael and Jarmolowski, Artur and Koester, Tino and Laubinger, Sascha and Li, Qingshun Quinn and Gu, Lianfeng and Seki, Motoaki and Staiger, Dorothee and Sunkar, Ramanjulu and Szweykowska-Kulinska, Zofia and Tu, Shih Long and Wachter, Andreas and Waugh, Robbie and Xiong, Liming and Zhang, Xiao Ning and Conesa, Ana and Reddy, Anireddy S.N. and Barta, Andrea and Kalyna, Maria and Brown, John W.S.},
  issn         = {1474-760X},
  journal      = {Genome Biology},
  publisher    = {BioMed Central},
  title        = {{A high-resolution single-molecule sequencing-based Arabidopsis transcriptome using novel methods of Iso-seq analysis}},
  doi          = {10.1186/s13059-022-02711-0},
  volume       = {23},
  year         = {2022},
}

@article{12116,
  abstract     = {Russia’s unprovoked attack on Ukraine has destroyed civilian infrastructure, including universities, research centers, and other academic infrastructure (1). Many Ukrainian scholars and researchers remain in Ukraine, and their work has suffered from major setbacks (2–4). We call on international scientists and institutions to support them.},
  author       = {Chhugani, Karishma and Frolova, Alina and Salyha, Yuriy and Fiscutean, Andrada and Zlenko, Oksana and Reinsone, Sanita and Wolfsberger, Walter W. and Ivashchenko, Oleksandra V. and Maci, Megi and Dziuba, Dmytro and Parkhomenko, Andrii and Bortz, Eric and Kondrashov, Fyodor and Łabaj, Paweł P. and Romero, Veronika and Hlávka, Jakub and Oleksyk, Taras K. and Mangul, Serghei},
  issn         = {1095-9203},
  journal      = {Science},
  number       = {6626},
  pages        = {1285--1286},
  publisher    = {American Association for the Advancement of Science},
  title        = {{Remote opportunities for scholars in Ukraine}},
  doi          = {10.1126/science.adg0797},
  volume       = {378},
  year         = {2022},
}

@article{12131,
  abstract     = {Replication-incompetent adenoviral vectors have been extensively used as a platform for vaccine design, with at least four anti-COVID-19 vaccines authorized to date. These vaccines elicit neutralizing antibody responses directed against SARS-CoV-2 Spike protein and confer significant level of protection against SARS-CoV-2 infection. Immunization with adenovirus-vectored vaccines is known to be accompanied by the production of anti-vector antibodies, which may translate into reduced efficacy of booster or repeated rounds of revaccination. Here, we used blood samples from patients who received an adenovirus-based Gam-COVID-Vac vaccine to address the question of whether anti-vector antibodies may influence the magnitude of SARS-CoV-2-specific humoral response after booster vaccination. We observed that rAd26-based prime vaccination with Gam-COVID-Vac induced the development of Ad26-neutralizing antibodies, which persisted in circulation for at least 9 months. Our analysis further indicates that high pre-boost Ad26 neutralizing antibody titers do not appear to affect the humoral immunogenicity of the Gam-COVID-Vac boost. The titers of anti-SARS-CoV-2 RBD IgGs and antibodies, which neutralized both the wild type and the circulating variants of concern of SARS-CoV-2 such as Delta and Omicron, were independent of the pre-boost levels of Ad26-neutralizing antibodies. Thus, our results support the development of repeated immunization schedule with adenovirus-based COVID-19 vaccines.},
  author       = {Byazrova, Maria G. and Astakhova, Ekaterina A. and Minnegalieva, Aygul and Sukhova, Maria M. and Mikhailov, Artem A. and Prilipov, Alexey G. and Gorchakov, Andrey A. and Filatov, Alexander V.},
  issn         = {2059-0105},
  journal      = {npj Vaccines},
  keywords     = {Pharmacology (medical), Infectious Diseases, Pharmacology, Immunology, SARS-COV-2, COVID},
  publisher    = {Springer Nature},
  title        = {{Anti-Ad26 humoral immunity does not compromise SARS-COV-2 neutralizing antibody responses following Gam-COVID-Vac booster vaccination}},
  doi          = {10.1038/s41541-022-00566-x},
  volume       = {7},
  year         = {2022},
}

@article{12173,
  abstract     = {With increasing urbanization and industrialization, the prevalence of inflammatory bowel diseases (IBDs) has steadily been rising over the past two decades. IBD involves flares of gastrointestinal (GI) inflammation accompanied by microbiota perturbations. However, microbial mechanisms that trigger such flares remain elusive. Here, we analyzed the association of the emerging pathogen atypical enteropathogenic E. coli (aEPEC) with IBD disease activity. The presence of diarrheagenic E. coli was assessed in stool samples from 630 IBD patients and 234 age- and sex-matched controls without GI symptoms. Microbiota was analyzed with 16S ribosomal RNA gene amplicon sequencing, and 57 clinical aEPEC isolates were subjected to whole-genome sequencing and in vitro pathogenicity experiments including biofilm formation, epithelial barrier function and the ability to induce pro-inflammatory signaling. The presence of aEPEC correlated with laboratory, clinical and endoscopic disease activity in ulcerative colitis (UC), as well as microbiota dysbiosis. In vitro, aEPEC strains induce epithelial p21-activated kinases, disrupt the epithelial barrier and display potent biofilm formation. The effector proteins espV and espG2 distinguish aEPEC cultured from UC and Crohn’s disease patients, respectively. EspV-positive aEPEC harbor more virulence factors and have a higher pro-inflammatory potential, which is counteracted by 5-ASA. aEPEC may tip a fragile immune–microbiota homeostasis and thereby contribute to flares in UC. aEPEC isolates from UC patients display properties to disrupt the epithelial barrier and to induce pro-inflammatory signaling in vitro.},
  author       = {Baumgartner, Maximilian and Zirnbauer, Rebecca and Schlager, Sabine and Mertens, Daniel and Gasche, Nikolaus and Sladek, Barbara and Herbold, Craig and Bochkareva, Olga and Emelianenko, Vera and Vogelsang, Harald and Lang, Michaela and Klotz, Anton and Moik, Birgit and Makristathis, Athanasios and Berry, David and Dabsch, Stefanie and Khare, Vineeta and Gasche, Christoph},
  issn         = {1949-0984},
  journal      = {Gut Microbes},
  keywords     = {Infectious Diseases, Microbiology (medical), Gastroenterology, Microbiology},
  number       = {1},
  publisher    = {Taylor & Francis},
  title        = {{Atypical enteropathogenic E. coli are associated with disease activity in ulcerative colitis}},
  doi          = {10.1080/19490976.2022.2143218},
  volume       = {14},
  year         = {2022},
}

@article{10927,
  abstract     = {Motivation
High plasticity of bacterial genomes is provided by numerous mechanisms including horizontal gene transfer and recombination via numerous flanking repeats. Genome rearrangements such as inversions, deletions, insertions and duplications may independently occur in different strains, providing parallel adaptation or phenotypic diversity. Specifically, such rearrangements might be responsible for virulence, antibiotic resistance and antigenic variation. However, identification of such events requires laborious manual inspection and verification of phyletic pattern consistency.
Results
Here, we define the term ‘parallel rearrangements’ as events that occur independently in phylogenetically distant bacterial strains and present a formalization of the problem of parallel rearrangements calling. We implement an algorithmic solution for the identification of parallel rearrangements in bacterial populations as a tool PaReBrick. The tool takes a collection of strains represented as a sequence of oriented synteny blocks and a phylogenetic tree as input data. It identifies rearrangements, tests them for consistency with a tree, and sorts the events by their parallelism score. The tool provides diagrams of the neighbors for each block of interest, allowing the detection of horizontally transferred blocks or their extra copies and the inversions in which copied blocks are involved. We demonstrated PaReBrick’s efficiency and accuracy and showed its potential to detect genome rearrangements responsible for pathogenicity and adaptation in bacterial genomes.},
  author       = {Zabelkin, Alexey and Yakovleva, Yulia and Bochkareva, Olga and Alexeev, Nikita},
  issn         = {1460-2059},
  journal      = {Bioinformatics},
  number       = {2},
  pages        = {357--363},
  publisher    = {Oxford University Press},
  title        = {{PaReBrick: PArallel REarrangements and BReaks identification toolkit}},
  doi          = {10.1093/bioinformatics/btab691},
  volume       = {38},
  year         = {2022},
}

@article{11187,
  abstract     = {During the COVID-19 pandemic, genomics and bioinformatics have emerged as essential public health tools. The genomic data acquired using these methods have supported the global health response, facilitated the development of testing methods and allowed the timely tracking of novel SARS-CoV-2 variants. Yet the virtually unlimited potential for rapid generation and analysis of genomic data is also coupled with unique technical, scientific and organizational challenges. Here, we discuss the application of genomic and computational methods for efficient data-driven COVID-19 response, the advantages of the democratization of viral sequencing around the world and the challenges associated with viral genome data collection and processing.},
  author       = {Knyazev, Sergey and Chhugani, Karishma and Sarwal, Varuni and Ayyala, Ram and Singh, Harman and Karthikeyan, Smruthi and Deshpande, Dhrithi and Baykal, Pelin Icer and Comarova, Zoia and Lu, Angela and Porozov, Yuri and Vasylyeva, Tetyana I. and Wertheim, Joel O. and Tierney, Braden T. and Chiu, Charles Y. and Sun, Ren and Wu, Aiping and Abedalthagafi, Malak S. and Pak, Victoria M. and Nagaraj, Shivashankar H. and Smith, Adam L. and Skums, Pavel and Pasaniuc, Bogdan and Komissarov, Andrey and Mason, Christopher E. and Bortz, Eric and Lemey, Philippe and Kondrashov, Fyodor and Beerenwinkel, Niko and Lam, Tommy Tsan Yuk and Wu, Nicholas C. and Zelikovsky, Alex and Knight, Rob and Crandall, Keith A. and Mangul, Serghei},
  issn         = {1548-7105},
  journal      = {Nature Methods},
  number       = {4},
  pages        = {374--380},
  publisher    = {Springer Nature},
  title        = {{Unlocking capacities of genomics for the COVID-19 response and future pandemics}},
  doi          = {10.1038/s41592-022-01444-z},
  volume       = {19},
  year         = {2022},
}

@article{11448,
  abstract     = {Studies of protein fitness landscapes reveal biophysical constraints guiding protein evolution and empower prediction of functional proteins. However, generalisation of these findings is limited due to scarceness of systematic data on fitness landscapes of proteins with a defined evolutionary relationship. We characterized the fitness peaks of four orthologous fluorescent proteins with a broad range of sequence divergence. While two of the four studied fitness peaks were sharp, the other two were considerably flatter, being almost entirely free of epistatic interactions. Mutationally robust proteins, characterized by a flat fitness peak, were not optimal templates for machine-learning-driven protein design – instead, predictions were more accurate for fragile proteins with epistatic landscapes. Our work paves insights for practical application of fitness landscape heterogeneity in protein engineering.},
  author       = {Gonzalez Somermeyer, Louisa and Fleiss, Aubin and Mishin, Alexander S and Bozhanova, Nina G and Igolkina, Anna A and Meiler, Jens and Alaball Pujol, Maria-Elisenda and Putintseva, Ekaterina V and Sarkisyan, Karen S and Kondrashov, Fyodor},
  issn         = {2050-084X},
  journal      = {eLife},
  keywords     = {General Immunology and Microbiology, General Biochemistry, Genetics and Molecular Biology, General Medicine, General Neuroscience},
  publisher    = {eLife Sciences Publications},
  title        = {{Heterogeneity of the GFP fitness landscape and data-driven protein design}},
  doi          = {10.7554/elife.75842},
  volume       = {11},
  year         = {2022},
}

@article{11344,
  abstract     = {Until recently, Shigella and enteroinvasive Escherichia coli were thought to be primate-restricted pathogens. The base of their pathogenicity is the type 3 secretion system (T3SS) encoded by the pINV virulence plasmid, which facilitates host cell invasion and subsequent proliferation. A large family of T3SS effectors, E3 ubiquitin-ligases encoded by the ipaH genes, have a key role in the Shigella pathogenicity through the modulation of cellular ubiquitination that degrades host proteins. However, recent genomic studies identified ipaH genes in the genomes of Escherichia marmotae, a potential marmot pathogen, and an E. coli extracted from fecal samples of bovine calves, suggesting that non-human hosts may also be infected by these strains, potentially pathogenic to humans. We performed a comparative genomic study of the functional repertoires in the ipaH gene family in Shigella and enteroinvasive Escherichia from human and predicted non-human hosts. We found that fewer than half of Shigella genomes had a complete set of ipaH genes, with frequent gene losses and duplications that were not consistent with the species tree and nomenclature. Non-human host IpaH proteins had a diverse set of substrate-binding domains and, in contrast to the Shigella proteins, two variants of the NEL C-terminal domain. Inconsistencies between strains phylogeny and composition of effectors indicate horizontal gene transfer between E. coli adapted to different hosts. These results provide a framework for understanding of ipaH-mediated host-pathogens interactions and suggest a need for a genomic study of fecal samples from diseased animals.},
  author       = {Dranenko, NO and Tutukina, MN and Gelfand, MS and Kondrashov, Fyodor and Bochkareva, Olga},
  issn         = {2045-2322},
  journal      = {Scientific Reports},
  publisher    = {Springer Nature},
  title        = {{Chromosome-encoded IpaH ubiquitin ligases indicate non-human enteroinvasive Escherichia}},
  doi          = {10.1038/s41598-022-10827-3},
  volume       = {12},
  year         = {2022},
}

@article{9255,
  abstract     = {Our ability to trust that a random number is truly random is essential for fields as diverse as cryptography and fundamental tests of quantum mechanics. Existing solutions both come with drawbacks—device-independent quantum random number generators (QRNGs) are highly impractical and standard semi-device-independent QRNGs are limited to a specific physical implementation and level of trust. Here we propose a framework for semi-device-independent randomness certification, using a source of trusted vacuum in the form of a signal shutter. It employs a flexible set of assumptions and levels of trust, allowing it to be applied in a wide range of physical scenarios involving both quantum and classical entropy sources. We experimentally demonstrate our protocol with a photonic setup and generate secure random bits under three different assumptions with varying degrees of security and resulting data rates.},
  author       = {Pivoluska, Matej and Plesch, Martin and Farkas, Máté and Ruzickova, Natalia and Flegel, Clara and Valencia, Natalia Herrera and Mccutcheon, Will and Malik, Mehul and Aguilar, Edgar A.},
  issn         = {2056-6387},
  journal      = {npj Quantum Information},
  publisher    = {Springer Nature},
  title        = {{Semi-device-independent random number generation with flexible assumptions}},
  doi          = {10.1038/s41534-021-00387-1},
  volume       = {7},
  year         = {2021},
}

@article{9380,
  abstract     = {Shigella are pathogens originating within the Escherichia lineage but frequently classified as a separate genus. Shigella genomes contain numerous insertion sequences (ISs) that lead to pseudogenisation of affected genes and an increase of non-homologous recombination. Here, we study 414 genomes of E. coli and Shigella strains to assess the contribution of genomic rearrangements to Shigella evolution. We found that Shigella experienced exceptionally high rates of intragenomic rearrangements and had a decreased rate of homologous recombination compared to pathogenic and non-pathogenic E. coli. The high rearrangement rate resulted in independent disruption of syntenic regions and parallel rearrangements in different Shigella lineages. Specifically, we identified two types of chromosomally encoded E3 ubiquitin-protein ligases acquired independently by all Shigella strains that also showed a high level of sequence conservation in the promoter and further in the 5′-intergenic region. In the only available enteroinvasive E. coli (EIEC) strain, which is a pathogenic E. coli with a phenotype intermediate between Shigella and non-pathogenic E. coli, we found a rate of genome rearrangements comparable to those in other E. coli and no functional copies of the two Shigella-specific E3 ubiquitin ligases. These data indicate that the accumulation of ISs influenced many aspects of genome evolution and played an important role in the evolution of intracellular pathogens. Our research demonstrates the power of comparative genomics-based on synteny block composition and an important role of non-coding regions in the evolution of genomic islands.},
  author       = {Seferbekova, Zaira and Zabelkin, Alexey and Yakovleva, Yulia and Afasizhev, Robert and Dranenko, Natalia O. and Alexeev, Nikita and Gelfand, Mikhail S. and Bochkareva, Olga},
  issn         = {1664-302X},
  journal      = {Frontiers in Microbiology},
  publisher    = {Frontiers},
  title        = {{High rates of genome rearrangements and pathogenicity of Shigella spp}},
  doi          = {10.3389/fmicb.2021.628622},
  volume       = {12},
  year         = {2021},
}

