@phdthesis{18979,
  abstract     = {Topological Data Analysis (TDA) is a discipline utilizing the mathematical field of topology to study data, most prominently collections of point sets. This thesis summarizes three projects related to computations in TDA.

The first one establishes a variant of TDA for chromatic point sets, where each point is given a color. For example, we are given positions of cells within a tumor microenvironment, and color the cancerous cells red, and the immune cells blue.

The aim is then to give a quantitative description of how the two or more sets of points spatially interact. Building on image, kernel and cokernel variants of persistent homology, we suggest six-packs of persistent diagrams as such a descriptor.

We describe a construction of a chromatic alpha complex, which enables  efficient computation of several variants of the six-packs. We give topological descriptions of natural subcomplexes of the chromatic alpha complex, and show that the radii of the simplices form a discrete Morse function. Finally, we provide an implementation of the presented chromatic TDA pipeline.

The second part aims to translate a powerful tool of sheaf theory to elementary terms using labeled matrices. The goal is to enable their use in computational settings. We show that derived categories of sheaves over finite posets have, up to isomorphism, unique objects---minimal injective resolutions---and give a concrete algorithm to compute them. We further describe simple algorithms to compute derived pushforwards and pullbacks for monotonic maps, and their proper variants for inclusions, and demonstrate their tractability by providing an implementation. Finally, we suggest a discrete definition of microsupport and show desirable properties inspired by discrete Morse theory.

In the last part, we present a collection of observations about collapses. We give a characterization of collapsibility in terms of unitriangular submatrices of the boundary matrix, a cotree-tree decomposition, and the optimal solution to a variant of the Procrustes problem. We establish relation between dual collapses and relative Morse theory and pose several open questions. Finally, focusing on complexes embedded in the three-dimensional Euclidean space, we describe a relation between the collapsibility and the triviality of a polygonal knot.},
  author       = {Draganov, Ondrej},
  issn         = {2663-337X},
  keywords     = {topological data analysis, chromatic point set, alpha complex, persistent homology, six pack, sheaf, microlocal discrete Morse, injective resolution, collapse, knot, discrete Morse theory},
  pages        = {140},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Structures and computations in topological data analysis}},
  doi          = {10.15479/at:ista:18979},
  year         = {2025},
}

@phdthesis{18667,
  abstract     = {Many chemical and physical properties of materials are determined by the material’s shape,
for example the size of its pores and the width of its tunnels. This makes materials science
a prime application area for geometrical and topological methods. Nevertheless many
methods in topological data analysis have not been satisfyingly extended to the needs of
materials science. This thesis provides new methods and new mathematical theorems
targeted at those specific needs by answering four different research questions. While the
motivation for each of the research questions arises from materials science, the methods
are versatile and can be applied in different areas as well. 

The first research question is concerned with image data, for example a three-dimensional
computed tomography (CT) scan of a material, like sand or stone. There are two commonly
used topologies for digital images and depending on the application either of them might be
required. However, software for computing the topological data analysis method persistence
homology, usually supports only one of the two topologies. We answer the question how to
compute persistent homology of an image with respect to one of the two topologies using
software that is intended for the other topology. 

The second research question is concerned with image data as well, and asks how much
of the topological information of an image is lost when the resolution is coarsened. As
computer tomography scanners are more expensive the higher the resolution, it is an
important question in materials science to know which resolution is enough to get satisfying
persistent homology. We give theoretical bounds on the information loss based on different
geometrical properties of the object to be scanned. In addition, we conduct experiments on
sand and stone CT image data. 

The third research question is motivated by comparing crystalline materials efficiently. As
the atoms within a crystal repeat periodically, crystalline materials are either modeled by
unmanageable infinite periodic point sets, or by one of their fundamental domains, which is
unstable under perturbation. Therefore a fingerprint of crystalline materials is needed, with
appropriate properties such that comparing the crystals can be eased by comparing the
fingerprints instead. We define the density fingerprint and prove the necessary properties. 

The fourth research question is motivated by studying the hole-structure or connectedness,
i.e. persistent homology or merge trees, of crystalline materials. A common way to deal
with periodicity is to take a fundamental domain and identify opposite boundaries to form a
torus. However, computing persistent homology or merge trees on that torus loses some
of the information materials scientists are interested in and is additionally not stable under
certain noise. We therefore decorate the merge tree stemming from the torus with additional
information describing the density and growth rate of the periodic copies of a component
within a growing spherical window. We prove all desired properties, like stability and efficient
computability.},
  author       = {Heiss, Teresa},
  isbn         = {978-3-99078-052-7},
  issn         = {2663-337X},
  keywords     = {persistent homology, topological data analysis, periodic, crystalline materials, images, fingerprint},
  pages        = {111},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{New methods for applying topological data analysis to materials science}},
  doi          = {10.15479/at:ista:18667},
  year         = {2024},
}

@article{11608,
  abstract     = {In order to understand stellar evolution, it is crucial to efficiently determine stellar surface rotation periods. Indeed, while they are of great importance in stellar models, angular momentum transport processes inside stars are still poorly understood today. Surface rotation, which is linked to the age of the star, is one of the constraints needed to improve the way those processes are modelled. Statistics of the surface rotation periods for a large sample of stars of different spectral types are thus necessary. An efficient tool to automatically determine reliable rotation periods is needed when dealing with large samples of stellar photometric datasets. The objective of this work is to develop such a tool. For this purpose, machine learning classifiers constitute relevant bases to build our new methodology. Random forest learning abilities are exploited to automate the extraction of rotation periods in Kepler light curves. Rotation periods and complementary parameters are obtained via three different methods: a wavelet analysis, the autocorrelation function of the light curve, and the composite spectrum. We trained three different classifiers: one to detect if rotational modulations are present in the light curve, one to flag close binary or classical pulsators candidates that can bias our rotation period determination, and finally one classifier to provide the final rotation period. We tested our machine learning pipeline on 23 431 stars of the Kepler K and M dwarf reference rotation catalogue for which 60% of the stars have been visually inspected. For the sample of 21 707 stars where all the input parameters are provided to the algorithm, 94.2% of them are correctly classified (as rotating or not). Among the stars that have a rotation period in the reference catalogue, the machine learning provides a period that agrees within 10% of the reference value for 95.3% of the stars. Moreover, the yield of correct rotation periods is raised to 99.5% after visually inspecting 25.2% of the stars. Over the two main analysis steps, rotation classification and period selection, the pipeline yields a global agreement with the reference values of 92.1% and 96.9% before and after visual inspection. Random forest classifiers are efficient tools to determine reliable rotation periods in large samples of stars. The methodology presented here could be easily adapted to extract surface rotation periods for stars with different spectral types or observed by other instruments such as K2, TESS or by PLATO in the near future.},
  author       = {Breton, S. N. and Santos, A. R. G. and Bugnet, Lisa Annabelle and Mathur, S. and García, R. A. and Pallé, P. L.},
  issn         = {1432-0746},
  journal      = {Astronomy & Astrophysics},
  keywords     = {Space and Planetary Science, Astronomy and Astrophysics, methods: data analysis / stars: solar-type / stars: activity / stars: rotation / starspots},
  publisher    = {EDP Sciences},
  title        = {{ROOSTER: A machine-learning analysis tool for Kepler stellar rotation periods}},
  doi          = {10.1051/0004-6361/202039947},
  volume       = {647},
  year         = {2021},
}

@article{11615,
  abstract     = {The recently published Kepler mission Data Release 25 (DR25) reported on ∼197 000 targets observed during the mission. Despite this, no wide search for red giants showing solar-like oscillations have been made across all stars observed in Kepler’s long-cadence mode. In this work, we perform this task using custom apertures on the Kepler pixel files and detect oscillations in 21 914 stars, representing the largest sample of solar-like oscillating stars to date. We measure their frequency at maximum power, νmax, down to νmax≃4μHz and obtain log (g) estimates with a typical uncertainty below 0.05 dex, which is superior to typical measurements from spectroscopy. Additionally, the νmax distribution of our detections show good agreement with results from a simulated model of the Milky Way, with a ratio of observed to predicted stars of 0.992 for stars with 10<νmax<270μHz. Among our red giant detections, we find 909 to be dwarf/subgiant stars whose flux signal is polluted by a neighbouring giant as a result of using larger photometric apertures than those used by the NASA Kepler science processing pipeline. We further find that only 293 of the polluting giants are known Kepler targets. The remainder comprises over 600 newly identified oscillating red giants, with many expected to belong to the Galactic halo, serendipitously falling within the Kepler pixel files of targeted stars.},
  author       = {Hon, Marc and Stello, Dennis and García, Rafael A and Mathur, Savita and Sharma, Sanjib and Colman, Isabel L and Bugnet, Lisa Annabelle},
  issn         = {1365-2966},
  journal      = {Monthly Notices of the Royal Astronomical Society},
  keywords     = {Space and Planetary Science, Astronomy and Astrophysics, asteroseismology, methods: data analysis, techniques: image processing, stars: oscillations, stars: statistics},
  number       = {4},
  pages        = {5616--5630},
  publisher    = {Oxford University Press},
  title        = {{A search for red giant solar-like oscillations in all Kepler data}},
  doi          = {10.1093/mnras/stz622},
  volume       = {485},
  year         = {2019},
}

@article{11623,
  abstract     = {Brightness variations due to dark spots on the stellar surface encode information about stellar surface rotation and magnetic activity. In this work, we analyze the Kepler long-cadence data of 26,521 main-sequence stars of spectral types M and K in order to measure their surface rotation and photometric activity level. Rotation-period estimates are obtained by the combination of a wavelet analysis and autocorrelation function of the light curves. Reliable rotation estimates are determined by comparing the results from the different rotation diagnostics and four data sets. We also measure the photometric activity proxy Sph using the amplitude of the flux variations on an appropriate timescale. We report rotation periods and photometric activity proxies for about 60% of the sample, including 4431 targets for which McQuillan et al. did not report a rotation period. For the common targets with rotation estimates in this study and in McQuillan et al., our rotation periods agree within 99%. In this work, we also identify potential polluters, such as misclassified red giants and classical pulsator candidates. Within the parameter range we study, there is a mild tendency for hotter stars to have shorter rotation periods. The photometric activity proxy spans a wider range of values with increasing effective temperature. The rotation period and photometric activity proxy are also related, with Sph being larger for fast rotators. Similar to McQuillan et al., we find a bimodal distribution of rotation periods.},
  author       = {Santos, A. R. G. and García, R. A. and Mathur, S. and Bugnet, Lisa Annabelle and van Saders, J. L. and Metcalfe, T. S. and Simonian, G. V. A. and Pinsonneault, M. H.},
  issn         = {0067-0049},
  journal      = {The Astrophysical Journal Supplement Series},
  keywords     = {Space and Planetary Science, Astronomy and Astrophysics, methods: data analysis, stars: activity, stars: low-mass, stars: rotation, starspots, techniques: photometric},
  number       = {1},
  publisher    = {IOP Publishing},
  title        = {{Surface rotation and photometric activity for Kepler targets. I. M and K main-sequence stars}},
  doi          = {10.3847/1538-4365/ab3b56},
  volume       = {244},
  year         = {2019},
}

@unpublished{11630,
  abstract     = {The second mission of NASA’s Kepler satellite, K2, has collected hundreds of thousands of lightcurves for stars close to the ecliptic plane. This new sample could increase the number of known pulsating stars and then improve our understanding of those stars. For the moment only a few stars have been properly classified and published. In this work, we present a method to automaticly classify K2 pulsating stars using a Machine Learning technique called Random Forest. The objective is to sort out the stars in four classes: red giant (RG), main-sequence Solar-like stars (SL), classical pulsators (PULS) and Other. To do this we use the effective temperatures and the luminosities of the stars as well as the FliPer features, that measures the amount of power contained in the power spectral density. The classifier now retrieves the right classification for more than 80% of the stars.},
  author       = {Saux, A. Le and Bugnet, Lisa Annabelle and Mathur, S. and Breton, S. N. and Garcia, R. A.},
  booktitle    = {arXiv},
  keywords     = {asteroseismology - methods, data analysis - thecniques, machine learning - stars, oscillations},
  title        = {{Automatic classification of K2 pulsating stars using machine learning techniques}},
  doi          = {10.48550/arXiv.1906.09611},
  year         = {2019},
}

@phdthesis{6473,
  abstract     = {Single cells are constantly interacting with their environment and each other, more importantly, the accurate perception of environmental cues is crucial for growth, survival, and reproduction. This communication between cells and their environment can be formalized in mathematical terms and be quantified as the information flow between them, as prescribed by information theory. 
The recent availability of real–time dynamical patterns of signaling molecules in single cells has allowed us to identify encoding about the identity of the environment in the time–series. However, efficient estimation of the information transmitted by these signals has been a data–analysis challenge due to the high dimensionality of the trajectories and the limited number of samples. In the first part of this thesis, we develop and evaluate decoding–based estimation methods to lower bound the mutual information and derive model–based precise information estimates for biological reaction networks governed by the chemical master equation. This is followed by applying the decoding-based methods to study the intracellular representation of extracellular changes in budding yeast, by observing the transient dynamics of nuclear translocation of 10 transcription factors in response to 3 stress conditions. Additionally, we apply these estimators to previously published data on ERK and Ca2+ signaling and yeast stress response. We argue that this single cell decoding-based measure of information provides an unbiased, quantitative and interpretable measure for the fidelity of biological signaling processes. 
Finally, in the last section, we deal with gene regulation which is primarily controlled by transcription factors (TFs) that bind to the DNA to activate gene expression. The possibility that non-cognate TFs activate transcription diminishes the accuracy of regulation with potentially disastrous effects for the cell. This ’crosstalk’ acts as a previously unexplored source of noise in biochemical networks and puts a strong constraint on their performance. To mitigate erroneous initiation we propose an out of equilibrium scheme that implements kinetic proofreading. We show that such architectures are favored  over their equilibrium counterparts for complex organisms despite introducing noise in gene expression. },
  author       = {Cepeda Humerez, Sarah A},
  issn         = {2663-337X},
  keywords     = {Information estimation, Time-series, data analysis},
  pages        = {135},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Estimating information flow in single cells}},
  doi          = {10.15479/AT:ISTA:6473},
  year         = {2019},
}

@article{11618,
  abstract     = {Asteroseismology provides global stellar parameters such as masses, radii, or surface gravities using mean global seismic parameters and effective temperature for thousands of low-mass stars (0.8 M⊙ < M < 3 M⊙). This methodology has been successfully applied to stars in which acoustic modes excited by turbulent convection are measured. Other methods such as the Flicker technique can also be used to determine stellar surface gravities, but only works for log g above 2.5 dex. In this work, we present a new metric called FliPer (Flicker in spectral power density, in opposition to the standard Flicker measurement which is computed in the time domain); it is able to extend the range for which reliable surface gravities can be obtained (0.1 < log g < 4.6 dex) without performing any seismic analysis for stars brighter than Kp < 14. FliPer takes into account the average variability of a star measured in the power density spectrum in a given range of frequencies. However, FliPer values calculated on several ranges of frequency are required to better characterize a star. Using a large set of asteroseismic targets it is possible to calibrate the behavior of surface gravity with FliPer through machine learning. This calibration made with a random forest regressor covers a wide range of surface gravities from main-sequence stars to subgiants and red giants, with very small uncertainties from 0.04 to 0.1 dex. FliPer values can be inserted in automatic global seismic pipelines to either give an estimation of the stellar surface gravity or to assess the quality of the seismic results by detecting any outliers in the obtained νmax values. FliPer also constrains the surface gravities of main-sequence dwarfs using only long-cadence data for which the Nyquist frequency is too low to measure the acoustic-mode properties.},
  author       = {Bugnet, Lisa Annabelle and García, R. A. and Davies, G. R. and Mathur, S. and Corsaro, E. and Hall, O. J. and Rendle, B. M.},
  issn         = {1432-0746},
  journal      = {Astronomy & Astrophysics},
  keywords     = {Space and Planetary Science, Astronomy and Astrophysics, asteroseismology / methods, data analysis / stars, oscillations},
  publisher    = {EDP Sciences},
  title        = {{FliPer: A global measure of power density to estimate surface gravities of main-sequence solar-like stars and red giants}},
  doi          = {10.1051/0004-6361/201833106},
  volume       = {620},
  year         = {2018},
}

@unpublished{11631,
  abstract     = {The recently launched NASA Transiting Exoplanet Survey Satellite (TESS) mission is going to collect lightcurves for a few hundred million of stars and we expect to increase the number of pulsating stars to analyze compared to the few thousand stars observed by the CoRoT, Kepler and K2 missions. However, most of the TESS targets have not yet been properly classified and characterized. In order to improve the analysis of the TESS data, it is crucial to determine the type of stellar pulsations in a timely manner. We propose an automatic method to classify stars attending to their pulsation properties, in particular, to identify solar-like pulsators among all TESS targets. It relies on the use of the global amount of power contained in the power spectrum (already known as the FliPer method) as a key parameter, along with
the effective temperature, to feed into a machine learning classifier. Our study, based on TESS simulated datasets, shows that we are able to classify pulsators with a 98% accuracy.},
  author       = {Bugnet, Lisa Annabelle and García, R. A. and Davies, G. R. and Mathur, S. and Hall, O. J. and Rendle, B. M.},
  booktitle    = {arXiv},
  keywords     = {asteroseismology - methods, data analysis - stars, oscillations},
  title        = {{FliPer: Classifying TESS pulsating stars}},
  doi          = {10.48550/arXiv.1811.12140},
  year         = {2018},
}

@unpublished{11633,
  abstract     = {Our understanding of stars through asteroseismic data analysis is limited by our ability to take advantage of the huge amount of observed stars provided by space missions such as CoRoT, Kepler , K2, and soon TESS and PLATO. Global seismic pipelines provide global stellar parameters such as mass and radius using the mean seismic parameters, as well as the effective temperature. These pipelines are commonly used automatically on thousands of stars observed by K2 for 3 months (and soon TESS for at least ∼ 1 month). However, pipelines are not immune from misidentifying noise peaks and stellar oscillations. Therefore, new validation techniques are required to assess the quality of these results. We present a new metric called FliPer (Flicker in Power), which takes into account the average variability at all measured time scales. The proper calibration of FliPer enables us to obtain good estimations of global stellar parameters such as surface gravity that are robust against the influence of noise peaks and hence are an excellent way to find faults in asteroseismic pipelines.},
  author       = {Bugnet, Lisa Annabelle and Garcia, R. A. and Davies, G. R. and Mathur, S. and Corsaro, E.},
  booktitle    = {arXiv},
  keywords     = {asteroseismology - methods, data analysis - stars, oscillations},
  title        = {{FliPer: Checking the reliability of global seismic parameters from automatic pipelines}},
  doi          = {10.48550/arXiv.1711.02890},
  year         = {2017},
}

@article{10396,
  abstract     = {Stimfit is a free cross-platform software package for viewing and analyzing electrophysiological data. It supports most standard file types for cellular neurophysiology and other biomedical formats. Its analysis algorithms have been used and validated in several experimental laboratories. Its embedded Python scripting interface makes Stimfit highly extensible and customizable.},
  author       = {Schlögl, Alois and Jonas, Peter M and Schmidt-Hieber, C. and Guzman, S. J.},
  issn         = {1862-278X},
  journal      = {Biomedical Engineering / Biomedizinische Technik},
  keywords     = {biomedical engineering, data analysis, free software},
  location     = {Graz, Austria},
  number       = {SI-1-Track-G},
  publisher    = {De Gruyter},
  title        = {{Stimfit: A fast visualization and analysis environment for cellular neurophysiology}},
  doi          = {10.1515/bmt-2013-4181},
  volume       = {58},
  year         = {2013},
}