@article{21484,
  abstract     = {An individual's phenotype reflects a complex interplay of the direct effects of their DNA, epigenetic modifications of their DNA induced by their parents, and indirect effects of their parents' DNA. Here, we derive how the genetic variance within a population is changed under the influence of indirect maternal, paternal and parent-of-origin effects under random mating. We also consider indirect effects of a sibling, in particular how the genetic variance is altered when looking at the phenotypic difference between two siblings. The calculations are then extended to include assortative mating (AM), which alters the variance by inducing increased homozygosity and correlations within and across loci. AM likely leads to covariance of parental genetic effects, a measure of the similarity of parents in the indirect effects they have on their children. We propose that this assortment for parental characteristics, where biological parents create similar environments for their children, can create shared parental effects across traits and the appearance of cross-trait AM. Our theory shows how the resemblance among relatives increases under both AM, indirect and parent-of-origin effects. When our model is used to predict correlations among relatives in human height, we find that explaining the patterns observed in real data requires both indirect genetic effects and assortative mating. The degree to which direct, indirect and epigenetic effects shape the phenotypic variance of complex traits remains an open question that requires large-scale family data to be resolved.},
  author       = {Krätschmer, Ilse and Robinson, Matthew Richard},
  issn         = {1943-2631},
  journal      = {Genetics},
  publisher    = {Oxford University Press},
  title        = {{A quantitative genetic model for indirect genetic effects and genomic imprinting under random and assortative mating}},
  doi          = {10.1093/genetics/iyag042},
  year         = {2026},
}

@article{21488,
  abstract     = {Human height is a model for the genetic analysis of complex traits, and recent studies suggest the presence of thousands of common genetic variant associations and hundreds of low-frequency/rare variants. Here, we develop a new algorithmic paradigm based on approximate message passing (genomic vector approximate message passing [gVAMP]) for identifying DNA sequence variants associated with complex traits and common diseases in large-scale whole-genome sequencing (WGS) data. We show that gVAMP accurately localizes associations to variants with the correct frequency and position in the DNA, outperforming existing fine-mapping methods in selecting the appropriate genetic variants within WGS data. We then apply gVAMP to jointly model the relationship of tens of millions of WGS variants with human height in hundreds of thousands of UK Biobank individuals. We identify 59 rare variants and gene burden scores alongside many hundreds of DNA regions containing common variant associations and show that understanding the genetic basis of complex traits will require the joint analysis of hundreds of millions of variables measured on millions of people. The polygenic risk scores obtained from gVAMP have high accuracy (including a prediction accuracy of ∼46% for human height) and outperform current methods for downstream tasks such as mixed linear model association testing across 13 UK Biobank traits. In conclusion, gVAMP offers a scalable foundation for a wider range of analyses in WGS data.},
  author       = {Depope, Al and Bajzik, Jakub and Mondelli, Marco and Robinson, Matthew Richard},
  issn         = {2666-979X},
  journal      = {Cell Genomics},
  publisher    = {Elsevier},
  title        = {{Joint modeling of whole-genome sequencing data for human height via approximate message passing}},
  doi          = {10.1016/j.xgen.2026.101162},
  year         = {2026},
}

@article{20479,
  abstract     = {Genetic variation is generally regarded as a prerequisite for evolution. In principle, epigenetic information inherited independently of DNA sequence can also enable evolution, but whether this occurs in natural populations is unknown. Here we show that single-nucleotide and epigenetic gene body DNA methylation (gbM) polymorphisms explain comparable amounts of expression variance in <jats:italic>Arabidopsis thaliana</jats:italic> populations. We genetically demonstrate that gbM regulates transcription, and we identify and genetically validate many associations between gbM polymorphism and the variation of complex traits: fitness under heat and drought, flowering time and accumulation of diverse minerals. Epigenome-wide association studies pinpoint trait-relevant genes with greater precision than genetic association analyses, probably due to reduced linkage disequilibrium between gbM variants. Finally, we identify numerous associations between gbM epialleles and diverse environmental conditions in native habitats, suggesting that gbM facilitates adaptation. Overall, our results indicate that epigenetic methylation variation fundamentally shapes phenotypic diversity in a natural population.},
  author       = {Shahzad, Zaigham and Hollwey, Elizabeth and Moore, Jonathan D. and Choi, Jaemyung and Cassin-Ross, Gaëlle and Rouached, Hatem and Robinson, Matthew Richard and Zilberman, Daniel},
  issn         = {2055-0278},
  journal      = {Nature Plants},
  pages        = {2084--2099},
  publisher    = {Springer Nature},
  title        = {{Gene body methylation regulates gene expression and mediates phenotypic diversity in natural Arabidopsis populations}},
  doi          = {10.1038/s41477-025-02108-4},
  volume       = {11},
  year         = {2025},
}

@article{20816,
  abstract     = {Background: DNA methylation (DNAm) can regulate gene expression, and its genome-wide patterns (epigenetic scores or EpiScores) can act as biomarkers for complex traits. The relative stability of methylation profiles may enable better assessment of chronic exposures compared to single time-point protein measures. We present the first large-scale epigenetic study of the highly-abundant serum proteome measured via ultra-high throughput mass spectrometry in 14,671 samples from the Generation Scotland cohort. We further demonstrate the first large-scale comparison of protein EpiScores and their respective proteins as predictors of incident cardiovascular disease.

Results: Marginal epigenome-wide association models, adjusting for age, sex, measurement batch, estimated white cell proportions, BMI, smoking and methylation principal components, reveal 15,855 significant CpG – protein associations across 125 of 133 proteins PBonferroni < 2.71 × 10-10. Bayesian epigenome-wide association studies of the same 133 proteins reveal 697 CpG-Protein associations (posterior inclusion probability > 0.95). 112 protein EpiScores correlate significantly with their respective protein in a holdout test-set. Of these, sixteen associate significantly with incident all-cause cardiovascular disease (Nevents=191) compared to one measured protein.

Conclusions: We highlight a complex interplay between the blood-based methylome and proteome. Importantly, we show that protein EpiScores correlate with measured proteins and demonstrate that the, as-yet understudied, high-abundance proteome may yield clinically relevant biomarkers. The protein EpiScores demonstrate more significant associations with cardiovascular disease than directly measured proteins, suggesting their potential as clinical biomarkers for monitoring or predicting disease risk. We suggest that biomarker development could be enhanced by the consideration of protein EpiScores alongside measured proteins.},
  author       = {Robertson, Josephine A. and Bajzik, Jakub and Vernardis, Spyros and Chybowska, Aleksandra D. and Mccartney, Daniel L. and Grauslys, Arturas and Mur, Jure and Smith, Hannah M. and Campbell, Archie and Drake, Camilla and Grant, Hannah and Pearce, Jamie and Russ, Tom C. and Adkin, Poppy and White, Matthew and Brigden, Charles and Messner, Christoph B. and Porteous, David J. and Hayward, Caroline and Cox, Simon R. and Zelezniak, Aleksej and Ralser, Markus and Robinson, Matthew Richard and Marioni, Riccardo E.},
  issn         = {1474-760X},
  journal      = {Genome Biology},
  publisher    = {Springer Nature},
  title        = {{Methylome-wide association studies and epigenetic biomarker development for 133 mass spectrometry-assessed circulating proteins in 14,671 Generation Scotland participants}},
  doi          = {10.1186/s13059-025-03892-0},
  volume       = {26},
  year         = {2025},
}

@article{18754,
  abstract     = {Exploring the molecular correlates of metabolic health measures may identify their shared and unique biological processes and pathways. Molecular proxies of these traits may also provide a more objective approach to their measurement. Here, DNA methylation (DNAm) data were used in epigenome-wide association studies (EWASs) and for training epigenetic scores (EpiScores) of six metabolic traits: body mass index (BMI), body fat percentage, waist-hip ratio, and blood-based measures of glucose, high-density lipoprotein cholesterol, and total cholesterol in >17,000 volunteers from the Generation Scotland (GS) cohort. We observed a maximum of 12,033 significant findings (p < 3.6 × 10−8) for BMI in a marginal linear regression EWAS. By contrast, a joint and conditional Bayesian penalized regression approach yielded 27 high-confidence associations with BMI. EpiScores trained in GS performed well in both Scottish and Singaporean test cohorts (Lothian Birth Cohort 1936 [LBC1936] and Health for Life in Singapore [HELIOS]). The EpiScores for BMI and total cholesterol performed best in HELIOS, explaining 20.8% and 7.1% of the variance in the measured traits, respectively. The corresponding results in LBC1936 were 14.4% and 3.2%, respectively. Differences were observed in HELIOS for body fat, where the EpiScore explained ∼9% of the variance in Chinese and Malay -subgroups but ∼3% in the Indian subgroup. The EpiScores also correlated with cognitive function in LBC1936 (standardized βrange: 0.08–0.12, false discovery rate p [pFDR] < 0.05). Accounting for the correlation structure across the methylome can vastly affect the number of lead findings in EWASs. The EpiScores of metabolic traits are broadly applicable across populations and can reflect differences in cognition.},
  author       = {Smith, Hannah M. and Ng, Hong Kiat and Moodie, Joanna E. and Gadd, Danni A. and Mccartney, Daniel L. and Bernabeu, Elena and Campbell, Archie and Redmond, Paul and Taylor, Adele and Page, Danielle and Corley, Janie and Harris, Sarah E. and Tay, Darwin and Deary, Ian J. and Evans, Kathryn L. and Robinson, Matthew Richard and Chambers, John C. and Loh, Marie and Cox, Simon R. and Marioni, Riccardo E. and Hillary, Robert F.},
  issn         = {1537-6605},
  journal      = {American Journal of Human Genetics},
  number       = {1},
  pages        = {106--115},
  publisher    = {Elsevier},
  title        = {{DNA methylation-based predictors of metabolic traits in Scottish and Singaporean cohorts}},
  doi          = {10.1016/j.ajhg.2024.11.012},
  volume       = {112},
  year         = {2025},
}

@article{19023,
  abstract     = {Alcohol consumption is an important risk factor for multiple diseases. It is typically assessed via self-report, which is open to measurement error through recall bias. Instead, molecular data such as blood-based DNA methylation (DNAm) could be used to derive a more objective measure of alcohol consumption by incorporating information from cytosine-phosphate-guanine (CpG) sites known to be linked to the trait. Here, we explore the epigenetic architecture of self-reported weekly units of alcohol consumption in the Generation Scotland study. We first create a blood-based epigenetic score (EpiScore) of alcohol consumption using elastic net penalized linear regression. We explore the effect of pre-filtering for CpG features ahead of elastic net, as well as differential patterns by sex and by units consumed in the last week relative to an average week. The final EpiScore was trained on 16,717 individuals and tested in four external cohorts: the Lothian Birth Cohorts (LBC) of 1921 and 1936, the Sister Study, and the Avon Longitudinal Study of Parents and Children (total N across studies > 10,000). The maximum Pearson correlation between the EpiScore and self-reported alcohol consumption within cohort ranged from 0.41 to 0.53. In LBC1936, higher EpiScore levels had significant associations with poorer global brain imaging metrics, whereas self-reported alcohol consumption did not. Finally, we identified two novel CpG loci via a Bayesian penalized regression epigenome-wide association study of alcohol consumption. Together, these findings show how DNAm can objectively characterize patterns of alcohol consumption that associate with brain health, unlike self-reported estimates.},
  author       = {Bernabeu, Elena and Chybowska, Aleksandra D. and Kresovich, Jacob K. and Suderman, Matthew and Mccartney, Daniel L. and Hillary, Robert F. and Corley, Janie and Valdés-Hernández, Maria Del C. and Maniega, Susana Muñoz and Bastin, Mark E. and Wardlaw, Joanna M. and Xu, Zongli and Sandler, Dale P. and Campbell, Archie and Harris, Sarah E. and Mcintosh, Andrew M. and Taylor, Jack A. and Yousefi, Paul and Cox, Simon R. and Evans, Kathryn L. and Robinson, Matthew Richard and Vallejos, Catalina A. and Marioni, Riccardo E.},
  issn         = {1868-7083},
  journal      = {Clinical Epigenetics},
  publisher    = {Springer Nature},
  title        = {{Blood-based epigenome-wide association study and prediction of alcohol consumption}},
  doi          = {10.1186/s13148-025-01818-y},
  volume       = {17},
  year         = {2025},
}

@inproceedings{17147,
  abstract     = {Efficient utilization of large-scale biobank data is crucial for inferring the genetic basis of disease and predicting health outcomes from the DNA. Yet we lack efficient, accurate methods that scale to data where electronic health records are linked to whole genome sequence information. To address this issue, our paper develops a new algorithmic paradigm based on Approximate Message Passing (AMP), which is specifically tailored for genomic prediction and association testing. Our method yields comparable out-of-sample prediction accuracy to the state of the art on UK Biobank traits, whilst dramatically improving computational complexity, with a 8x-speed up in the run time. In addition, AMP theory provides a joint association testing framework, which outperforms the currently used REGENIE method, in roughly a third of the compute time. This first, truly large-scale application of the AMP framework lays the foundations for a far wider range of statistical analyses for hundreds of millions of variables measured on millions of people.},
  author       = {Depope, Al and Mondelli, Marco and Robinson, Matthew Richard},
  booktitle    = {2024 IEEE International Conference on Acoustics, Speech, and Signal Processing},
  isbn         = {9798350344851},
  issn         = {1520-6149},
  location     = {Seoul, Korea},
  pages        = {13151--13155},
  publisher    = {IEEE},
  title        = {{Inference of genetic effects via approximate message passing}},
  doi          = {10.1109/ICASSP48485.2024.10447198},
  year         = {2024},
}

@unpublished{18648,
  abstract     = {Statistical causal learning in genomics relies on the instrumental variable method of
Mendelian Randomization (MR). Currently, an overwhelming number of MR studies
purport to show causal relationships among a wide range of risk factors and outcomes.
Here, we show that selecting instrument variables from genome-wide association study
estimates leads to high false discovery rates for many MR approaches, which can be
greatly reduced by employing a graphical inference approach which: (i) explicitly tests
instrumental variable assumptions; (ii) distinguishes direct from indirect factors in very
high-dimensional data; (iii) discriminates pleiotropic from trait-specific markers, controlling for LD genome-wide; (iv) accommodates rare variants and binary outcomes in a
principled way; and (v) identifies potential unobserved latent confounding. For 17 traits
and 8.4M variants recorded for 458,747 individuals in the UK Biobank, we show that
standard MR analysis gives an abundance of findings that disappear under stringent
assumption checks, with many relationships reflecting potential unmeasured confounding. This implies that mixtures of temporal precedence and potential for reverse-causality
prohibit understanding the underlying nature of phenotypic and genetic correlations in
biobank data. We propose that well-curated longitudinal records are likely needed and
that our approach provides a first-step toward robust principled screening for potential
causal links.
},
  author       = {Machnik, Nick N and Mahmoudi, Seyed Mahdi and Borczyk, Malgorzata and Krätschmer, Ilse and Bauer, Markus J. and Robinson, Matthew Richard},
  booktitle    = {bioRxiv},
  title        = {{Causal inference for multiple risk factors and diseases from genomics data}},
  doi          = {10.1101/2023.12.06.570392},
  year         = {2024},
}

@article{14258,
  abstract     = {There is currently little evidence that the genetic basis of human phenotype varies significantly across the lifespan. However, time-to-event phenotypes are understudied and can be thought of as reflecting an underlying hazard, which is unlikely to be constant through life when values take a broad range. Here, we find that 74% of 245 genome-wide significant genetic associations with age at natural menopause (ANM) in the UK Biobank show a form of age-specific effect. Nineteen of these replicated discoveries are identified only by our modeling framework, which determines the time dependency of DNA-variant age-at-onset associations without a significant multiple-testing burden. Across the range of early to late menopause, we find evidence for significantly different underlying biological pathways, changes in the signs of genetic correlations of ANM to health indicators and outcomes, and differences in inferred causal relationships. We find that DNA damage response processes only act to shape ovarian reserve and depletion for women of early ANM. Genetically mediated delays in ANM were associated with increased relative risk of breast cancer and leiomyoma at all ages and with high cholesterol and heart failure for late-ANM women. These findings suggest that a better understanding of the age dependency of genetic risk factor relationships among health indicators and outcomes is achievable through appropriate statistical modeling of large-scale biobank data.},
  author       = {Ojavee, Sven E. and Darrous, Liza and Patxot, Marion and Läll, Kristi and Fischer, Krista and Mägi, Reedik and Kutalik, Zoltan and Robinson, Matthew Richard},
  issn         = {1537-6605},
  journal      = {American Journal of Human Genetics},
  number       = {9},
  pages        = {1549--1563},
  publisher    = {Elsevier},
  title        = {{Genetic insights into the age-specific biological mechanisms governing human ovarian aging}},
  doi          = {10.1016/j.ajhg.2023.07.006},
  volume       = {110},
  year         = {2023},
}

@article{12719,
  abstract     = {Background
Epigenetic clocks can track both chronological age (cAge) and biological age (bAge). The latter is typically defined by physiological biomarkers and risk of adverse health outcomes, including all-cause mortality. As cohort sample sizes increase, estimates of cAge and bAge become more precise. Here, we aim to develop accurate epigenetic predictors of cAge and bAge, whilst improving our understanding of their epigenomic architecture.

Methods
First, we perform large-scale (N = 18,413) epigenome-wide association studies (EWAS) of chronological age and all-cause mortality. Next, to create a cAge predictor, we use methylation data from 24,674 participants from the Generation Scotland study, the Lothian Birth Cohorts (LBC) of 1921 and 1936, and 8 other cohorts with publicly available data. In addition, we train a predictor of time to all-cause mortality as a proxy for bAge using the Generation Scotland cohort (1214 observed deaths). For this purpose, we use epigenetic surrogates (EpiScores) for 109 plasma proteins and the 8 component parts of GrimAge, one of the current best epigenetic predictors of survival. We test this bAge predictor in four external cohorts (LBC1921, LBC1936, the Framingham Heart Study and the Women’s Health Initiative study).

Results
Through the inclusion of linear and non-linear age-CpG associations from the EWAS, feature pre-selection in advance of elastic net regression, and a leave-one-cohort-out (LOCO) cross-validation framework, we obtain cAge prediction with a median absolute error equal to 2.3 years. Our bAge predictor was found to slightly outperform GrimAge in terms of the strength of its association to survival (HRGrimAge = 1.47 [1.40, 1.54] with p = 1.08 × 10−52, and HRbAge = 1.52 [1.44, 1.59] with p = 2.20 × 10−60). Finally, we introduce MethylBrowsR, an online tool to visualise epigenome-wide CpG-age associations.

Conclusions
The integration of multiple large datasets, EpiScores, non-linear DNAm effects, and new approaches to feature selection has facilitated improvements to the blood-based epigenetic prediction of biological and chronological age.},
  author       = {Bernabeu, Elena and Mccartney, Daniel L. and Gadd, Danni A. and Hillary, Robert F. and Lu, Ake T. and Murphy, Lee and Wrobel, Nicola and Campbell, Archie and Harris, Sarah E. and Liewald, David and Hayward, Caroline and Sudlow, Cathie and Cox, Simon R. and Evans, Kathryn L. and Horvath, Steve and Mcintosh, Andrew M. and Robinson, Matthew Richard and Vallejos, Catalina A. and Marioni, Riccardo E.},
  issn         = {1756-994X},
  journal      = {Genome Medicine},
  publisher    = {Springer Nature},
  title        = {{Refining epigenetic prediction of chronological and biological age}},
  doi          = {10.1186/s13073-023-01161-y},
  volume       = {15},
  year         = {2023},
}

@article{11733,
  abstract     = {Genetically informed, deep-phenotyped biobanks are an important research resource and it is imperative that the most powerful, versatile, and efficient analysis approaches are used. Here, we apply our recently developed Bayesian grouped mixture of regressions model (GMRM) in the UK and Estonian Biobanks and obtain the highest genomic prediction accuracy reported to date across 21 heritable traits. When compared to other approaches, GMRM accuracy was greater than annotation prediction models run in the LDAK or LDPred-funct software by 15% (SE 7%) and 14% (SE 2%), respectively, and was 18% (SE 3%) greater than a baseline BayesR model without single-nucleotide polymorphism (SNP) markers grouped into minor allele frequency–linkage disequilibrium (MAF-LD) annotation categories. For height, the prediction accuracy R2 was 47% in a UK Biobank holdout sample, which was 76% of the estimated h2SNP. We then extend our GMRM prediction model to provide mixed-linear model association (MLMA) SNP marker estimates for genome-wide association (GWAS) discovery, which increased the independent loci detected to 16,162 in unrelated UK Biobank individuals, compared to 10,550 from BoltLMM and 10,095 from Regenie, a 62 and 65% increase, respectively. The average χ2 value of the leading markers increased by 15.24 (SE 0.41) for every 1% increase in prediction accuracy gained over a baseline BayesR model across the traits. Thus, we show that modeling genetic associations accounting for MAF and LD differences among SNP markers, and incorporating prior knowledge of genomic function, is important for both genomic prediction and discovery in large-scale individual-level studies.},
  author       = {Orliac, Etienne J. and Trejo Banos, Daniel and Ojavee, Sven E. and Läll, Kristi and Mägi, Reedik and Visscher, Peter M. and Robinson, Matthew Richard},
  issn         = {1091-6490},
  journal      = {Proceedings of the National Academy of Sciences of the United States of America},
  number       = {31},
  publisher    = {National Academy of Sciences},
  title        = {{Improving GWAS discovery and genomic prediction accuracy in biobank data}},
  doi          = {10.1073/pnas.2121279119},
  volume       = {119},
  year         = {2022},
}

@article{12142,
  abstract     = {Theory for liability-scale models of the underlying genetic basis of complex disease provides an important way to interpret, compare, and understand results generated from biological studies. In particular, through estimation of the liability-scale heritability (LSH), liability models facilitate an understanding and comparison of the relative importance of genetic and environmental risk factors that shape different clinically important disease outcomes. Increasingly, large-scale biobank studies that link genetic information to electronic health records, containing hundreds of disease diagnosis indicators that mostly occur infrequently within the sample, are becoming available. Here, we propose an extension of the existing liability-scale model theory suitable for estimating LSH in biobank studies of low-prevalence disease. In a simulation study, we find that our derived expression yields lower mean square error (MSE) and is less sensitive to prevalence misspecification as compared to previous transformations for diseases with  =< 2% population prevalence and LSH of =< 0.45, especially if the biobank sample prevalence is less than that of the wider population. Applying our expression to 13 diagnostic outcomes of  =< 3% prevalence in the UK Biobank study revealed important differences in LSH obtained from the different theoretical expressions that impact the conclusions made when comparing LSH across disease outcomes. This demonstrates the importance of careful consideration for estimation and prediction of low-prevalence disease outcomes and facilitates improved inference of the underlying genetic basis of  =< 2% population prevalence diseases, especially where biobank sample ascertainment results in a healthier sample population.},
  author       = {Ojavee, Sven E. and Kutalik, Zoltan and Robinson, Matthew Richard},
  issn         = {0002-9297},
  journal      = {The American Journal of Human Genetics},
  keywords     = {Genetics (clinical), Genetics},
  number       = {11},
  pages        = {2009--2017},
  publisher    = {Elsevier},
  title        = {{Liability-scale heritability estimation for biobank studies of low-prevalence disease}},
  doi          = {10.1016/j.ajhg.2022.09.011},
  volume       = {109},
  year         = {2022},
}

@article{12235,
  abstract     = {Background: About 800 women die every day worldwide from pregnancy-related complications, including excessive blood loss, infections and high-blood pressure (World Health Organization, 2019). To improve screening for high-risk pregnancies, we set out to identify patterns of maternal hematological changes associated with future pregnancy complications.

Methods: Using mixed effects models, we established changes in 14 complete blood count (CBC) parameters for 1710 healthy pregnancies and compared them to measurements from 98 pregnancy-induced hypertension, 106 gestational diabetes and 339 postpartum hemorrhage cases.

Results: Results show interindividual variations, but good individual repeatability in CBC values during physiological pregnancies, allowing the identification of specific alterations in women with obstetric complications. For example, in women with uncomplicated pregnancies, haemoglobin count decreases of 0.12 g/L (95% CI −0.16, −0.09) significantly per gestation week (p value <.001). Interestingly, this decrease is three times more pronounced in women who will develop pregnancy-induced hypertension, with an additional decrease of 0.39 g/L (95% CI −0.51, −0.26). We also confirm that obstetric complications and white CBC predict the likelihood of giving birth earlier during pregnancy.

Conclusion: We provide a comprehensive description of the associations between haematological changes through pregnancy and three major obstetric complications to support strategies for prevention, early-diagnosis and maternal care.},
  author       = {Patxot, Marion and Stojanov, Miloš and Ojavee, Sven Erik and Gobert, Rosanna Pescini and Kutalik, Zoltán and Gavillet, Mathilde and Baud, David and Robinson, Matthew Richard},
  issn         = {1600-0609},
  journal      = {European Journal of Haematology},
  keywords     = {Hematology, General Medicine},
  number       = {5},
  pages        = {566--575},
  publisher    = {Wiley},
  title        = {{Haematological changes from conception to childbirth: An indicator of major pregnancy complications}},
  doi          = {10.1111/ejh.13844},
  volume       = {109},
  year         = {2022},
}

@misc{13064,
  abstract     = {Genetically informed, deep-phenotyped biobanks are an important research resource and it is imperative that the most powerful, versatile, and efficient analysis approaches are used. Here, we apply our recently developed Bayesian grouped mixture of regressions model (GMRM) in the UK and Estonian Biobanks and obtain the highest genomic prediction accuracy reported to date across 21 heritable traits. When compared to other approaches, GMRM accuracy was greater than annotation prediction models run in the LDAK or LDPred-funct software by 15% (SE 7%) and 14% (SE 2%), respectively, and was 18% (SE 3%) greater than a baseline BayesR model without single-nucleotide polymorphism (SNP) markers grouped into minor allele frequency–linkage disequilibrium (MAF-LD) annotation categories. For height, the prediction accuracy R 2 was 47% in a UK Biobank holdout sample, which was 76% of the estimated h SNP 2 . We then extend our GMRM prediction model to provide mixed-linear model association (MLMA) SNP marker estimates for genome-wide association (GWAS) discovery, which increased the independent loci detected to 16,162 in unrelated UK Biobank individuals, compared to 10,550 from BoltLMM and 10,095 from Regenie, a 62 and 65% increase, respectively. The average χ2 value of the leading markers increased by 15.24 (SE 0.41) for every 1% increase in prediction accuracy gained over a baseline BayesR model across the traits. Thus, we show that modeling genetic associations accounting for MAF and LD differences among SNP markers, and incorporating prior knowledge of genomic function, is important for both genomic prediction and discovery in large-scale individual-level studies.},
  author       = {Orliac, Etienne and Trejo Banos, Daniel and Ojavee, Sven and Läll, Kristi and Mägi, Reedik and Visscher, Peter and Robinson, Matthew Richard},
  publisher    = {Dryad},
  title        = {{Improving genome-wide association discovery and genomic prediction accuracy in biobank data}},
  doi          = {10.5061/DRYAD.GTHT76HMZ},
  year         = {2022},
}

@article{10702,
  abstract     = {Background: Blood-based markers of cognitive functioning might provide an accessible way to track neurodegeneration years prior to clinical manifestation of cognitive impairment and dementia. Results: Using blood-based epigenome-wide analyses of general cognitive function, we show that individual differences in DNA methylation (DNAm) explain 35.0% of the variance in general cognitive function (g). A DNAm predictor explains ~4% of the variance, independently of a polygenic score, in two external cohorts. It also associates with circulating levels of neurology- and inflammation-related proteins, global brain imaging metrics, and regional cortical volumes. Conclusions: As sample sizes increase, the ability to assess cognitive function from DNAm data may be informative in settings where cognitive testing is unreliable or unavailable.},
  author       = {McCartney, Daniel L. and Hillary, Robert F. and Conole, Eleanor L.S. and Banos, Daniel Trejo and Gadd, Danni A. and Walker, Rosie M. and Nangle, Cliff and Flaig, Robin and Campbell, Archie and Murray, Alison D. and Maniega, Susana Muñoz and Valdés-Hernández, María Del C. and Harris, Mathew A. and Bastin, Mark E. and Wardlaw, Joanna M. and Harris, Sarah E. and Porteous, David J. and Tucker-Drob, Elliot M. and McIntosh, Andrew M. and Evans, Kathryn L. and Deary, Ian J. and Cox, Simon R. and Robinson, Matthew Richard and Marioni, Riccardo E.},
  issn         = {1474-760X},
  journal      = {Genome Biology},
  number       = {1},
  publisher    = {Springer Nature},
  title        = {{Blood-based epigenome-wide analyses of cognitive abilities}},
  doi          = {10.1186/s13059-021-02596-5},
  volume       = {23},
  year         = {2022},
}

@article{17076,
  abstract     = {Introduction: The levels of many blood proteins are associated with Alzheimer's disease (AD) or its pathological hallmarks. Elucidating the molecular factors that control circulating levels of these proteins may help to identify proteins associated with disease risk mechanisms.

Methods: Genome-wide and epigenome-wide studies (nindividuals ≤1064) were performed on plasma levels of 282 AD-associated proteins, identified by a structured literature review. Bayesian penalized regression estimated contributions of genetic and epigenetic variation toward inter-individual differences in plasma protein levels. Mendelian randomization (MR) and co-localization tested associations between proteins and disease-related phenotypes.

Results: Sixty-four independent genetic and 26 epigenetic loci were associated with 45 proteins. Novel findings included an association between plasma triggering receptor expressed on myeloid cells 2 (TREM2) levels and a polymorphism and cytosine-phosphate-guanine (CpG) site within the MS4A4A locus. Higher plasma tubulin-specific chaperone A (TBCA) and TREM2 levels were significantly associated with lower AD risk.

Discussion: Our data inform the regulation of biomarker levels and their relationships with AD.},
  author       = {Hillary, Robert F. and Gadd, Danni A. and McCartney, Daniel L. and Shi, Liu and Campbell, Archie and Walker, Rosie M. and Ritchie, Craig W. and Deary, Ian J. and Evans, Kathryn L. and Nevado‐Holgado, Alejo J. and Hayward, Caroline and Porteous, David J. and McIntosh, Andrew M. and Lovestone, Simon and Robinson, Matthew Richard and Marioni, Riccardo E.},
  issn         = {2352-8729},
  journal      = {Alzheimer's & Dementia: Diagnosis, Assessment & Disease Monitoring},
  number       = {1},
  publisher    = {Wiley},
  title        = {{Genome‐ and epigenome‐wide studies of plasma protein biomarkers for Alzheimer's disease implicate TBCA and TREM2 in disease risk}},
  doi          = {10.1002/dad2.12280},
  volume       = {14},
  year         = {2022},
}

@misc{13063,
  abstract     = {We develop a Bayesian model (BayesRR-RC) that provides robust SNP-heritability estimation, an alternative to marker discovery, and accurate genomic prediction, taking 22 seconds per iteration to estimate 8.4 million SNP-effects and 78 SNP-heritability parameters in the UK Biobank. We find that only $\leq$ 10\% of the genetic variation captured for height, body mass index, cardiovascular disease, and type 2 diabetes is attributable to proximal regulatory regions within 10kb upstream of genes, while 12-25% is attributed to coding regions, 32-44% to introns, and 22-28% to distal 10-500kb upstream regions. Up to 24% of all cis and coding regions of each chromosome are associated with each trait, with over 3,100 independent exonic and intronic regions and over 5,400 independent regulatory regions having &gt;95% probability of contributing &gt;0.001% to the genetic variance of these four traits. Our open-source software (GMRM) provides a scalable alternative to current approaches for biobank data.},
  author       = {Robinson, Matthew Richard},
  publisher    = {Dryad},
  title        = {{Probabilistic inference of the genetic architecture of functional enrichment of complex traits}},
  doi          = {10.5061/dryad.sqv9s4n51},
  year         = {2021},
}

@misc{13072,
  abstract     = {CpGs and corresponding mean weights for DNAm-based prediction of cognitive abilities (6 traits)},
  author       = {McCartney, Daniel L and Hillary, Robert F and Conole, Eleanor LS and Trejo Banos, Daniel and Gadd, Danni A and Walker, Rosie M and Nangle, Cliff and Flaig, Robin and Campbell, Archie and Murray, Alison D and Munoz Maniega, Susana and del C Valdes-Hernandez, Maria and Harris, Mathew A and Bastin, Mark E and Wardlaw, Joanna M and Harris, Sarah E and Porteous, David J and Tucker-Drob, Elliot M and McIntosh, Andrew M and Evans, Kathryn L and Deary, Ian J and Cox, Simon R and Robinson, Matthew Richard and Marioni, Riccardo E},
  publisher    = {Zenodo},
  title        = {{Blood-based epigenome-wide analyses of cognitive abilities}},
  doi          = {10.5281/ZENODO.5794028},
  year         = {2021},
}

@article{10069,
  abstract     = {The extent to which women differ in the course of blood cell counts throughout pregnancy, and the importance of these changes to pregnancy outcomes has not been well defined. Here, we develop a series of statistical analyses of repeated measures data to reveal the degree to which women differ in the course of pregnancy, predict the changes that occur, and determine the importance of these changes for post-partum hemorrhage (PPH) which is one of the leading causes of maternal mortality. We present a prospective cohort of 4082 births recorded at the University Hospital, Lausanne, Switzerland between 2009 and 2014 where full labour records could be obtained, along with complete blood count data taken at hospital admission. We find significant differences, at a [Formula: see text] level, among women in how blood count values change through pregnancy for mean corpuscular hemoglobin, mean corpuscular volume, mean platelet volume, platelet count and red cell distribution width. We find evidence that almost all complete blood count values show trimester-specific associations with PPH. For example, high platelet count (OR 1.20, 95% CI 1.01-1.53), high mean platelet volume (OR 1.58, 95% CI 1.04-2.08), and high erythrocyte levels (OR 1.36, 95% CI 1.01-1.57) in trimester 1 increased PPH, but high values in trimester 3 decreased PPH risk (OR 0.85, 0.79, 0.67 respectively). We show that differences among women in the course of blood cell counts throughout pregnancy have an important role in shaping pregnancy outcome and tracking blood count value changes through pregnancy improves identification of women at increased risk of postpartum hemorrhage. This study provides greater understanding of the complex changes in blood count values that occur through pregnancy and provides indicators to guide the stratification of patients into risk groups.},
  author       = {Robinson, Matthew Richard and Patxot, Marion and Stojanov, Miloš and Blum, Sabine and Baud, David},
  issn         = {2045-2322},
  journal      = {Scientific Reports},
  publisher    = {Springer Nature},
  title        = {{Postpartum hemorrhage risk is driven by changes in blood composition through pregnancy}},
  doi          = {10.1038/s41598-021-98411-z},
  volume       = {11},
  year         = {2021},
}

@article{8429,
  abstract     = {We develop a Bayesian model (BayesRR-RC) that provides robust SNP-heritability estimation, an alternative to marker discovery, and accurate genomic prediction, taking 22 seconds per iteration to estimate 8.4 million SNP-effects and 78 SNP-heritability parameters in the UK Biobank. We find that only ≤10% of the genetic variation captured for height, body mass index, cardiovascular disease, and type 2 diabetes is attributable to proximal regulatory regions within 10kb upstream of genes, while 12-25% is attributed to coding regions, 32–44% to introns, and 22-28% to distal 10-500kb upstream regions. Up to 24% of all cis and coding regions of each chromosome are associated with each trait, with over 3,100 independent exonic and intronic regions and over 5,400 independent regulatory regions having ≥95% probability of contributing ≥0.001% to the genetic variance of these four traits. Our open-source software (GMRM) provides a scalable alternative to current approaches for biobank data.},
  author       = {Patxot, Marion and Trejo Banos, Daniel and Kousathanas, Athanasios and Orliac, Etienne J and Ojavee, Sven E and Moser, Gerhard and Sidorenko, Julia and Kutalik, Zoltan and Magi, Reedik and Visscher, Peter M and Ronnegard, Lars and Robinson, Matthew Richard},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  number       = {1},
  publisher    = {Springer Nature},
  title        = {{Probabilistic inference of the genetic architecture underlying functional enrichment of complex traits}},
  doi          = {10.1038/s41467-021-27258-9},
  volume       = {12},
  year         = {2021},
}