@article{14739,
  abstract     = {Attempts to incorporate topological information in supervised learning tasks have resulted in the creation of several techniques for vectorizing persistent homology barcodes. In this paper, we study thirteen such methods. Besides describing an organizational framework for these methods, we comprehensively benchmark them against three well-known classification tasks. Surprisingly, we discover that the best-performing method is a simple vectorization, which consists only of a few elementary summary statistics. Finally, we provide a convenient web application which has been designed to facilitate exploration and experimentation with various vectorization methods.},
  author       = {Ali, Dashti and Asaad, Aras and Jimenez, Maria-Jose and Nanda, Vidit and Paluzo-Hidalgo, Eduardo and Soriano Trigueros, Manuel},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  keywords     = {Applied Mathematics, Artificial Intelligence, Computational Theory and Mathematics, Computer Vision and Pattern Recognition, Software},
  number       = {12},
  pages        = {14069--14080},
  publisher    = {IEEE},
  title        = {{A survey of vectorization methods in topological data analysis}},
  doi          = {10.1109/tpami.2023.3308391},
  volume       = {45},
  year         = {2023},
}

@article{18228,
  abstract     = {We introduce two constructions in geometric deep learning for 1) transporting orientation-dependent convolutional filters over a manifold in a continuous way and thereby defining a convolution operator that naturally incorporates the rotational effect of holonomy; and 2) allowing efficient evaluation of manifold convolution layers by sampling manifold valued random variables that center around a weighted diffusion mean. Both methods are inspired by stochastics on manifolds and geometric statistics, and provide examples of how stochastic methods – here horizontal frame bundle flows and non-linear bridge sampling schemes, can be used in geometric deep learning. We outline the theoretical foundation of the two methods, discuss their relation to Euclidean deep networks and existing methodology in geometric deep learning, and establish important properties of the proposed constructions.},
  author       = {Sommer, Stefan and Bronstein, Alexander},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {2},
  pages        = {811--822},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Horizontal flows and manifold stochastics in geometric deep learning}},
  doi          = {10.1109/tpami.2020.2994507},
  volume       = {44},
  year         = {2020},
}

@article{18245,
  abstract     = {Intel® RealSense™ SR300 is a depth camera capable of providing a VGA-size depth map at 60 fps and 0.125mm depth resolution. In addition, it outputs an infrared VGA-resolution image and a 1080p color texture image at 30 fps. SR300 form-factor enables it to be integrated into small consumer products and as a front facing camera in laptops and Ultrabooks™. The SR300 depth camera is based on a coded-light technology where triangulation between projected patterns and images captured by a dedicated sensor is used to produce the depth map. Each projected line is coded by a special temporal optical code, that enables a dense depth map reconstruction from its reflection. The solid mechanical assembly of the camera allows it to stay calibrated throughout temperature and pressure changes, drops, and hits. In addition, active dynamic control maintains a calibrated depth output. An extended API LibRS released with the camera allows developers to integrate the camera in various applications. Algorithms for 3D scanning, facial analysis, hand gesture recognition, and tracking are within reach for applications using the SR300. In this paper, we describe the underlying technology, hardware, and algorithms of the SR300, as well as its calibration procedure, and outline some use cases. We believe that this paper will provide a full case study of a mass-produced depth sensing product and technology.},
  author       = {Zabatani, Aviad and Surazhsky, Vitaly and Sperling, Erez and Moshe, Sagi Ben and Menashe, Ohad and Silver, David H. and Karni, Zachi and Bronstein, Alexander and Bronstein, Michael K and Kimmel, Ron},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {10},
  pages        = {2333--2345},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Intel® RealSense™ SR300 coded light depth camera}},
  doi          = {10.1109/tpami.2019.2915841},
  volume       = {42},
  year         = {2020},
}

@article{6554,
  abstract     = {Due to the importance of zero-shot learning, i.e. classifying images where there is a lack of labeled training data, the number of proposed approaches has recently increased steadily. We argue that it is time to take a step back and to analyze the status quo of the area. The purpose of this paper is three-fold. First, given the fact that there is no agreed upon zero-shot learning benchmark, we first define a new benchmark by unifying both the evaluation protocols and data splits of publicly available datasets used for this task. This is an important contribution as published results are often not comparable and sometimes even flawed due to, e.g. pre-training on zero-shot test classes. Moreover, we propose a new zero-shot learning dataset, the Animals with Attributes 2 (AWA2) dataset which we make publicly available both in terms of image features and the images themselves. Second, we compare and analyze a significant number of the state-of-the-art methods in depth, both in the classic zero-shot setting but also in the more realistic generalized zero-shot setting. Finally, we discuss in detail the limitations of the current status of the area which can be taken as a basis for advancing it.},
  author       = {Xian, Yongqin and Lampert, Christoph and Schiele, Bernt and Akata, Zeynep},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {9},
  pages        = {2251 -- 2265},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Zero-shot learning - A comprehensive evaluation of the good, the bad and the ugly}},
  doi          = {10.1109/tpami.2018.2857768},
  volume       = {41},
  year         = {2019},
}

@article{18415,
  abstract     = {Parsimony, including sparsity and low rank, has been shown to successfully model data in numerous machine learning and signal processing tasks. Traditionally, such modeling approaches rely on an iterative algorithm that minimizes an objective function with parsimony-promoting terms. The inherently sequential structure and data-dependent complexity and latency of iterative optimization constitute a major limitation in many applications requiring real-time performance or involving large-scale data. Another limitation encountered by these modeling techniques is the difficulty of their inclusion in discriminative learning scenarios. In this work, we propose to move the emphasis from the model to the pursuit algorithm, and develop a process-centric view of parsimonious modeling, in which a learned deterministic fixed-complexity pursuit process is used in lieu of iterative optimization. We show a principled way to construct learnable pursuit process architectures for structured sparse and robust low rank models, derived from the iteration of proximal descent algorithms. These architectures learn to approximate the exact parsimonious representation at a fraction of the complexity of the standard optimization methods. We also show that appropriate training regimes allow to naturally extend parsimonious models to discriminative settings. State-of-the-art results are demonstrated on several challenging problems in image and audio processing with several orders of magnitude speed-up compared to the exact optimization algorithms.},
  author       = {Sprechmann, P. and Bronstein, Alexander and Sapiro, G.},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {9},
  pages        = {1821--1833},
  publisher    = {IEEE},
  title        = {{Learning efficient sparse and low rank models}},
  doi          = {10.1109/tpami.2015.2392779},
  volume       = {37},
  year         = {2015},
}

@article{18416,
  abstract     = {We construct an extension of spectral and diffusion geometry to multiple modalities through simultaneous diagonalization of Laplacian matrices. This naturally extends classical data analysis tools based on spectral geometry, such as diffusion maps and spectral clustering. We provide several synthetic and real examples of manifold learning, object classification, and clustering, showing that the joint spectral geometry better captures the inherent structure of multi-modal data. We also show the relation of many previous approaches for multimodal manifold analysis to our framework.},
  author       = {Eynard, Davide and Kovnatsky, Artiom and Bronstein, Michael M. and Glashoff, Klaus and Bronstein, Alexander},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {12},
  pages        = {2505--2517},
  publisher    = {IEEE},
  title        = {{Multimodal manifold snalysis by simultaneous diagonalization of Laplacians}},
  doi          = {10.1109/tpami.2015.2408348},
  volume       = {37},
  year         = {2015},
}

@article{18413,
  abstract     = {Informative and discriminative feature descriptors play a fundamental role in deformable shape analysis. For example, they have been successfully employed in correspondence, registration, and retrieval tasks. In recent years, significant attention has been devoted to descriptors obtained from the spectral decomposition of the Laplace-Beltrami operator associated with the shape. Notable examples in this family are the heat kernel signature (HKS) and the recently introduced wave kernel signature (WKS). The Laplacian-based descriptors achieve state-of-the-art performance in numerous shape analysis tasks; they are computationally efficient, isometry-invariant by construction, and can gracefully cope with a variety of transformations. In this paper, we formulate a generic family of parametric spectral descriptors. We argue that to be optimized for a specific task, the descriptor should take into account the statistics of the corpus of shapes to which it is applied (the "signal") and those of the class of transformations to which it is made insensitive (the "noise"). While such statistics are hard to model axiomatically, they can be learned from examples. Following the spirit of the Wiener filter in signal processing, we show a learning scheme for the construction of optimized spectral descriptors and relate it to Mahalanobis metric learning. The superiority of the proposed approach in generating correspondences is demonstrated on synthetic and scanned human figures. We also show that the learned descriptors are robust enough to be learned on synthetic data and transferred successfully to scanned shapes.},
  author       = {Litman, R. and Bronstein, Alexander},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {1},
  pages        = {171--180},
  publisher    = {IEEE},
  title        = {{Learning spectral descriptors for deformable shape correspondence}},
  doi          = {10.1109/tpami.2013.148},
  volume       = {36},
  year         = {2014},
}

@article{18414,
  abstract     = {We introduce an efficient computational framework for hashing data belonging to multiple modalities into a single representation space where they become mutually comparable. The proposed approach is based on a novel coupled siamese neural network architecture and allows unified treatment of intra- and inter-modality similarity learning. Unlike existing cross-modality similarity learning approaches, our hashing functions are not limited to binarized linear projections and can assume arbitrarily complex forms. We show experimentally that our method significantly outperforms state-of-the-art hashing approaches on multimedia retrieval tasks.},
  author       = {Masci, Jonathan and Bronstein, Michael M. and Bronstein, Alexander and Schmidhuber, Jurgen},
  issn         = {1939-3539},
  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {4},
  pages        = {824--830},
  publisher    = {IEEE},
  title        = {{Multimodal similarity-preserving hashing}},
  doi          = {10.1109/tpami.2013.225},
  volume       = {36},
  year         = {2014},
}