@phdthesis{20694,
  abstract     = {Understanding the mechanisms underlying speciation is a central aim of evolutionary biology.
A persistent challenge in the field is to identify loci that contribute to reproductive isolation,
while disentangling signals of selection from demography, linkage and intrinsic genomic
features. Traditional population genomic approaches that rely on site-based statistics in
arbitrary fixed windows face inherent limitations, as they conflate historical and
contemporary processes of divergence and overlook haplotype structure. Recent advances in
whole-genome sequencing and methods to infer ancestral recombination graphs (ARGs) now
offer the opportunity to study genealogical relationships explicitly, revealing how lineages
coalesce and recombine through time. By directly analysing haplotype clustering by species
or phenotype and their patterns of coalescence, ARG-based methods show promise for
diagnosing sweeps, identifying barrier loci maintained under divergent selection amid gene
flow, and tracing their evolutionary history.
In this thesis, I explore the utility of genealogical approaches for studying species
divergence. In chapter 2, I propose a conceptual framework for defining haplotype blocks
through the structure of the ARG, using simulations and empirical data to highlight how
genealogical processes generate rich and often overlooked haplotypic patterns.
In chapter 3, I examine the genomic basis of a key evolutionary innovation in marine
snails Littorina. These snails offer a unique opportunity to study an innovation because they
include a very recent transition from egg-laying to live bearing, yet snails with the different
reproductive modes are not reciprocally monophyletic. I exploited this by using topology
clustering in ARG-derived local genealogical trees to pinpoint narrow genomic regions or
haplotype blocks that carry swept alleles, thus revealing that the transition from egg-laying
to live-bearing involves multiple, live-bearer-specific sweeps.
Chapter 4 establishes a population-scale, phased genomic resource for Antirrhinum
majus, using cost-effective haplotagging, then optimizes imputation from low-coverage data
against high-accuracy KASP sequencing to maximize sequence completeness with modest
accuracy trade-offs against a traditional short-read sequence pipeline. A hybrid phasing
strategy combines molecular phasing with statistical phasing to generate phased whole
genome sequences of 1084 Antirrhinum individuals at a fraction of long-read sequencing
costs.
In chapter 5, I analyse hybridising populations from two replicate hybrid zones to find
a parallel genetic basis of flower colour, amidst the noise in genomic differentiation landscape
driven by variation in demographic history. While outlier genome scans of FST failed to dissect
the causes of differentiation, ARG-based topology clustering revealed a reuse of colour
associated haplotypes across hybrid zones. In addition to the biological insight, this chapter
also presents a comparison of the latest ARG inference tools, showing that signals of
Abstract
viii
topological clustering qualitatively agree between methods, despite differences in the tree
sequences.
Next, in chapter 6, by leveraging ~1000 individuals in one of the hybrid zones, I
integrated genome-wide association studies of floral pigmentation with genealogical
inference, to test for additional colour loci, and confirm the effect of previously described loci.
This work demonstrates that flower colour variation is driven by a small number of large effect
loci, while also hinting at the presence of a new candidate regulatory factor.
Finally in chapter 7, in a preliminary analysis, I begin to dissect the genomic island of
speciation around Rosea/Eluta to understand its evolutionary origins. My results show that it
consists of 5 highly divergent loci, each of which is associated with flower colour. Using
patterns of coalescence in genealogical trees, I find evidence of staggered selective sweeps
and a persistent localized barrier to gene flow within an otherwise permeable genome.
Together, these chapters add to the increasing pool of studies using genealogical
approaches to complement and extend site-based statistics to use haplotype structures in
speciation research. By tracking haplotypes directly and connecting genealogical clustering to
population processes, ARG-based inference promises to provide new insights into how local
selective pressures, demographic history, and long-term barriers interact to shape the
genomic architecture of divergence. By underscoring the value of ARGs in revealing the finescale origins and maintenance of biodiversity, this thesis presents cautious optimism about
the benefits of using genealogical inference to learn more than what site-based statistics
could tell us.},
  author       = {Pal, Arka},
  issn         = {2663-337X},
  pages        = {268},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Using genealogies to study the genomic basis of species divergence}},
  doi          = {10.15479/AT-ISTA-20694},
  year         = {2025},
}

@article{20190,
  abstract     = {A major goal of speciation research is identifying loci that underpin barriers to gene flow. Population genomics takes a ‘bottom-up’ approach, scanning the genome for molecular signatures of processes that drive or maintain divergence. However, interpreting the ‘genomic landscape’ of speciation is complicated, because genome scans conflate multiple processes, most of which are not informative about gene flow. However, studying replicated population contrasts, including multiple incidences of secondary contact, can strengthen inferences. In this paper, we use linked-read sequencing (haplotagging), FST scans and genealogical methods to characterise the genomic landscape associated with replicate hybrid zone formation. We studied two flower colour varieties of the common snapdragon, Antirrhinum majus subspecies majus, that form secondary hybrid zones in multiple independent valleys in the Pyrenees. Consistent with past work, we found very low differentiation at one well-studied zone (Planoles). However, at a second zone (Avellanet), we found stronger differentiation and greater heterogeneity, which we argue is due to differences in the amount of introgression following secondary contact. Topology weighting of genealogical trees identified loci where haplotype diversity was associated with the two snapdragon varieties. Two of the strongest associations were at previously identified flower colour loci: Flavia, that affects yellow pigmentation, and Rosea/Eluta, two linked loci that affect magenta pigmentation. Preliminary analysis of coalescence times provides additional evidence for selective sweeps at these loci and barriers to gene flow. Our study highlights the impact of demographic history on the differentiation landscape, emphasising the need to distinguish between historical divergence and recent introgression.},
  author       = {Pal, Arka and Shipilina, Daria and Le Moan, Alan and Mcnairn, Adrian J. and Grenier, Jennifer K. and Kucka, Marek and Coop, Graham and Chan, Yingguang Frank and Barton, Nicholas H and Field, David and Stankowski, Sean},
  issn         = {1365-294X},
  journal      = {Molecular Ecology},
  number       = {22},
  publisher    = {Wiley},
  title        = {{Genealogical analysis of replicate flower colour hybrid zones in Antirrhinum}},
  doi          = {10.1111/mec.70067},
  volume       = {34},
  year         = {2025},
}

@article{14850,
  abstract     = {Elaborate sexual signals are thought to have evolved and be maintained to serve as honest indicators of signaller quality. One measure of quality is health, which can be affected by parasite infection. Cnemaspis mysoriensis is a diurnal gecko that is often infested with ectoparasites in the wild, and males of this species express visual (coloured gular patches) and chemical (femoral gland secretions) traits that receivers could assess during social interactions. In this paper, we tested whether ectoparasites affect individual health, and whether signal quality is an indicator of ectoparasite levels. In wild lizards, we found that ectoparasite level was negatively correlated with body condition in both sexes. Moreover, some characteristics of both visual and chemical traits in males were strongly associated with ectoparasite levels. Specifically, males with higher ectoparasite levels had yellow gular patches with lower brightness and chroma, and chemical secretions with a lower proportion of aromatic compounds. We then determined whether ectoparasite levels in males influence female behaviour. Using sequential choice trials, wherein females were provided with either the visual or the chemical signals of wild-caught males that varied in ectoparasite level, we found that only chemical secretions evoked an elevated female response towards less parasitised males. Simultaneous choice trials in which females were exposed to the chemical secretions from males that varied in parasite level further confirmed a preference for males with lower parasites loads. Overall, we find that although health (body condition) or ectoparasite load can be honestly advertised through multiple modalities, the parasite-mediated female response is exclusively driven by chemical signals.</jats:p>},
  author       = {Pal, Arka and Joshi, Mihir and Thaker, Maria},
  issn         = {0022-0949},
  journal      = {Journal of Experimental Biology},
  keywords     = {Insect Science, Molecular Biology, Animal Science and Zoology, Aquatic Science, Physiology, Ecology, Evolution, Behavior and Systematics},
  number       = {1},
  publisher    = {The Company of Biologists},
  title        = {{Too much information? Males convey parasite levels using more signal modalities than females utilise}},
  doi          = {10.1242/jeb.246217},
  volume       = {227},
  year         = {2024},
}

@article{14796,
  abstract     = {Key innovations are fundamental to biological diversification, but their genetic basis is poorly understood. A recent transition from egg-laying to live-bearing in marine snails (Littorina spp.) provides the opportunity to study the genetic architecture of an innovation that has evolved repeatedly across animals. Individuals do not cluster by reproductive mode in a genome-wide phylogeny, but local genealogical analysis revealed numerous small genomic regions where all live-bearers carry the same core haplotype. Candidate regions show evidence for live-bearer–specific positive selection and are enriched for genes that are differentially expressed between egg-laying and live-bearing reproductive systems. Ages of selective sweeps suggest that live-bearer–specific alleles accumulated over more than 200,000 generations. Our results suggest that new functions evolve through the recruitment of many alleles rather than in a single evolutionary step.},
  author       = {Stankowski, Sean and Zagrodzka, Zuzanna B. and Garlovsky, Martin D. and Pal, Arka and Shipilina, Daria and Garcia Castillo, Diego Fernando and Lifchitz, Hila and Le Moan, Alan and Leder, Erica and Reeve, James and Johannesson, Kerstin and Westram, Anja M and Butlin, Roger K.},
  issn         = {1095-9203},
  journal      = {Science},
  number       = {6678},
  pages        = {114--119},
  publisher    = {American Association for the Advancement of Science},
  title        = {{The genetic basis of a recent transition to live-bearing in marine snails}},
  doi          = {10.1126/science.adi2982},
  volume       = {383},
  year         = {2024},
}

@article{12159,
  abstract     = {The term “haplotype block” is commonly used in the developing field of haplotype-based inference methods. We argue that the term should be defined based on the structure of the Ancestral Recombination Graph (ARG), which contains complete information on the ancestry of a sample. We use simulated examples to demonstrate key features of the relationship between haplotype blocks and ancestral structure, emphasizing the stochasticity of the processes that generate them. Even the simplest cases of neutrality or of a “hard” selective sweep produce a rich structure, often missed by commonly used statistics. We highlight a number of novel methods for inferring haplotype structure, based on the full ARG, or on a sequence of trees, and illustrate how they can be used to define haplotype blocks using an empirical data set. While the advent of new, computationally efficient methods makes it possible to apply these concepts broadly, they (and additional new methods) could benefit from adding features to explore haplotype blocks, as we define them. Understanding and applying the concept of the haplotype block will be essential to fully exploit long and linked-read sequencing technologies.},
  author       = {Shipilina, Daria and Pal, Arka and Stankowski, Sean and Chan, Yingguang Frank and Barton, Nicholas H},
  issn         = {1365-294X},
  journal      = {Molecular Ecology},
  keywords     = {Genetics, Ecology, Evolution, Behavior and Systematics},
  number       = {6},
  pages        = {1441--1457},
  publisher    = {Wiley},
  title        = {{On the origin and structure of haplotype blocks}},
  doi          = {10.1111/mec.16793},
  volume       = {32},
  year         = {2023},
}

@article{1513,
  abstract     = {Insects of the order Hemiptera (true bugs) use a wide range of mechanisms of sex determination, including genetic sex determination, paternal genome elimination, and haplodiploidy. Genetic sex determination, the prevalent mode, is generally controlled by a pair of XY sex chromosomes or by an XX/X0 system, but different configurations that include additional sex chromosomes are also present. Although this diversity of sex determining systems has been extensively studied at the cytogenetic level, only the X chromosome of the model pea aphid Acyrthosiphon pisum has been analyzed at the genomic level, and little is known about X chromosome biology in the rest of the order.

In this study, we take advantage of published DNA- and RNA-seq data from three additional Hemiptera species to perform a comparative analysis of the gene content and expression of the X chromosome throughout this clade. We find that, despite showing evidence of dosage compensation, the X chromosomes of these species show female-biased expression, and a deficit of male-biased genes, in direct contrast to the pea aphid X. We further detect an excess of shared gene content between these very distant species, suggesting that despite the diversity of sex determining systems, the same chromosomal element is used as the X throughout a large portion of the order. },
  author       = {Pal, Arka and Vicoso, Beatriz},
  journal      = {Genome Biology and Evolution},
  number       = {12},
  pages        = {3259 -- 3268},
  publisher    = {Oxford University Press},
  title        = {{The X chromosome of hemipteran insects: Conservation, dosage compensation and sex-biased expression}},
  doi          = {10.1093/gbe/evv215},
  volume       = {7},
  year         = {2015},
}

