@phdthesis{18642,
  abstract     = {This thesis consists of two pieces of work in the broader feld of computational biology,
both of which are methods for the analysis of large scale biological data, implemented in
efcient software.
Chapter 2 introduces a statistical software for causal discovery and inference from observed
genetic marker and phenotypic trait data. We explore in simulation how well the method
can fne-map genetic efects, fnd the correct causal structure among tens of traits and
millions of genetic markers, and infer the causal efect size for the discovered causal
relations. We then apply the method to 8 million markers and 17 traits from the UK
Biobank and show that many relationships found with other methods are likely due to
the efects of hidden confounders.
Chapter 3 describes how this method can be applied to longitudinal data. I show how one
can incorporate the background knowledge present in the known order of measurements to
improve the accuracy of the causal discovery process, and explore the method’s ability to
identify age specifc genetic efects, and how the error rates of this recovery are infuenced
by missing data due to diferent censoring mechanisms.
Chapter 4 introduces a statistical software for the comparison of chromatin contact maps
based on the structural similarity index. We explore the robustness of the method to
noise and size diferences of the compared maps, show how it can measure evolutionary
conservation of topological features by providing a similarity ranking of syntenic regions,
and fnally how it can detect alterations in 3D genome structure due to genetic mutations
in samples of medical relevance.
},
  author       = {Machnik, Nick N},
  issn         = {2663-337X},
  pages        = {138},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Algorithms for causal learning and comparative analysis for genomic data}},
  doi          = {10.15479/at:ista:18642},
  year         = {2024},
}

@unpublished{18648,
  abstract     = {Statistical causal learning in genomics relies on the instrumental variable method of
Mendelian Randomization (MR). Currently, an overwhelming number of MR studies
purport to show causal relationships among a wide range of risk factors and outcomes.
Here, we show that selecting instrument variables from genome-wide association study
estimates leads to high false discovery rates for many MR approaches, which can be
greatly reduced by employing a graphical inference approach which: (i) explicitly tests
instrumental variable assumptions; (ii) distinguishes direct from indirect factors in very
high-dimensional data; (iii) discriminates pleiotropic from trait-specific markers, controlling for LD genome-wide; (iv) accommodates rare variants and binary outcomes in a
principled way; and (v) identifies potential unobserved latent confounding. For 17 traits
and 8.4M variants recorded for 458,747 individuals in the UK Biobank, we show that
standard MR analysis gives an abundance of findings that disappear under stringent
assumption checks, with many relationships reflecting potential unmeasured confounding. This implies that mixtures of temporal precedence and potential for reverse-causality
prohibit understanding the underlying nature of phenotypic and genetic correlations in
biobank data. We propose that well-curated longitudinal records are likely needed and
that our approach provides a first-step toward robust principled screening for potential
causal links.
},
  author       = {Machnik, Nick N and Mahmoudi, Seyed Mahdi and Borczyk, Malgorzata and Krätschmer, Ilse and Bauer, Markus J. and Robinson, Matthew Richard},
  booktitle    = {bioRxiv},
  title        = {{Causal inference for multiple risk factors and diseases from genomics data}},
  doi          = {10.1101/2023.12.06.570392},
  year         = {2024},
}

@article{14689,
  author       = {Ing-Simmons, Elizabeth and Machnik, Nick N and Vaquerizas, Juan M.},
  issn         = {1546-1718},
  journal      = {Nature Genetics},
  number       = {12},
  pages        = {2053--2055},
  publisher    = {Springer Nature},
  title        = {{Reply to: Revisiting the use of structural similarity index in Hi-C}},
  doi          = {10.1038/s41588-023-01595-5},
  volume       = {55},
  year         = {2023},
}

@techreport{8151,
  abstract     = {The main idea behind the Core Project is to teach first year students at IST scientific communication skills and let them practice by presenting their research within an interdisciplinary environment. Over the course of the first semester, students participated in seminars, where they shared their results with the colleagues from other fields and took part in discussions on relevant subjects. The main focus during this sessions was on delivering the information in a simplified and comprehensible way, going into the very basics of a subject if necessary. At the end, the students were asked to present their research in the written form to exercise their writing skills. The reports were gathered in this document. All of them were reviewed by the  teaching assistants and write-ups illustrating unique stylistic features and, in general, an outstanding level of writing skills, were honorably mentioned in the section "Selected Reports".},
  author       = {Maslov, Mikhail and Kondrashov, Fyodor and Artner, Christina and Hennessey-Wesen, Mike and Kavcic, Bor and Machnik, Nick N and Satapathy, Roshan K and Tomanek, Isabella},
  pages        = {425},
  publisher    = {IST Austria},
  title        = {{Core Project Proceedings}},
  year         = {2020},
}

@article{8707,
  abstract     = {Dynamic changes in the three-dimensional (3D) organization of chromatin are associated with central biological processes, such as transcription, replication and development. Therefore, the comprehensive identification and quantification of these changes is fundamental to understanding of evolutionary and regulatory mechanisms. Here, we present Comparison of Hi-C Experiments using Structural Similarity (CHESS), an algorithm for the comparison of chromatin contact maps and automatic differential feature extraction. We demonstrate the robustness of CHESS to experimental variability and showcase its biological applications on (1) interspecies comparisons of syntenic regions in human and mouse models; (2) intraspecies identification of conformational changes in Zelda-depleted Drosophila embryos; (3) patient-specific aberrant chromatin conformation in a diffuse large B-cell lymphoma sample; and (4) the systematic identification of chromatin contact differences in high-resolution Capture-C data. In summary, CHESS is a computationally efficient method for the comparison and classification of changes in chromatin contact data.},
  author       = { Galan, Silvia and Machnik, Nick N and Kruse, Kai and Díaz, Noelia and Marti-Renom, Marc A and Vaquerizas, Juan M},
  issn         = {1546-1718},
  journal      = {Nature Genetics},
  pages        = {1247--1255},
  publisher    = {Springer Nature},
  title        = {{CHESS enables quantitative comparison of chromatin contact data and automatic feature extraction}},
  doi          = {10.1038/s41588-020-00712-y},
  volume       = {52},
  year         = {2020},
}

