@inproceedings{17898,
  abstract     = {There is an ever-growing zoo of modern neural network models that can efficiently learn end-to-end control from visual observations. These advanced deep models, ranging from convolutional to Vision Transformers, from small to gigantic networks, have been extensively tested on offline image classification tasks. In this paper, we study these vision models with respect to the open-loop training to closed-loop generalization abilities, i.e., deployment realizes a causal feedback loop that is not present during training. This causality gap typically emerges in robotics applications such as autonomous driving, where a network is trained to imitate the control commands of a human. In this setting, two situations arise: 1) Closed-loop testing in-distribution, where the test environment shares properties with those of offline training data. 2) Closed-loop testing under distribution shifts and out-of-distribution. Contrary to recently reported results, we show that under proper training guidelines, all vision architectures perform indistinguishably well on in-distribution deployment, resolving the causality gap. In situation 2, We observe that scale is the strongest factor in improving closed-loop generalization regardless of the choice of the model architecture. Our results predict the trend that in the future we will see larger and larger models being used in offline-training-online-deployment imitation learning tasks in robotic applications.},
  author       = {Lechner, Mathias and Hasani, Ramin and Amini, Alexander and Wang, Tsun Hsuan and Henzinger, Thomas A and Rus, Daniela},
  booktitle    = {Proceedings of the 2024 IEEE International Conference on Robotics and Automation},
  isbn         = {9798350384574},
  issn         = {1050-4729},
  location     = {Yokohama, Japan},
  pages        = {2774--2782},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Overparametrization helps offline-to-online generalization of closed-loop control from pixels}},
  doi          = {10.1109/ICRA57147.2024.10610284},
  year         = {2024},
}

@inproceedings{12976,
  abstract     = {3D printing based on continuous deposition of materials, such as filament-based 3D printing, has seen widespread adoption thanks to its versatility in working with a wide range of materials. An important shortcoming of this type of technology is its limited multi-material capabilities. While there are simple hardware designs that enable multi-material printing in principle, the required software is heavily underdeveloped. A typical hardware design fuses together individual materials fed into a single chamber from multiple inlets before they are deposited. This design, however, introduces a time delay between the intended material mixture and its actual deposition. In this work, inspired by diverse path planning research in robotics, we show that this mechanical challenge can be addressed via improved printer control. We propose to formulate the search for optimal multi-material printing policies in a reinforcement
learning setup. We put forward a simple numerical deposition model that takes into account the non-linear material mixing and delayed material deposition. To validate our system we focus on color fabrication, a problem known for its strict requirements for varying material mixtures at a high spatial frequency. We demonstrate that our learned control policy outperforms state-of-the-art hand-crafted algorithms.},
  author       = {Liao, Kang and Tricard, Thibault and Piovarci, Michael and Seidel, Hans-Peter and Babaei, Vahid},
  booktitle    = {2023 IEEE International Conference on Robotics and Automation},
  issn         = {1050-4729},
  keywords     = {reinforcement learning, deposition, control, color, multi-filament},
  location     = {London, United Kingdom},
  pages        = {12345--12352},
  publisher    = {IEEE},
  title        = {{Learning deposition policies for fused multi-material 3D printing}},
  doi          = {10.1109/ICRA48891.2023.10160465},
  volume       = {2023},
  year         = {2023},
}

@inproceedings{12010,
  abstract     = {World models learn behaviors in a latent imagination space to enhance the sample-efficiency of deep reinforcement learning (RL) algorithms. While learning world models for high-dimensional observations (e.g., pixel inputs) has become practicable on standard RL benchmarks and some games, their effectiveness in real-world robotics applications has not been explored. In this paper, we investigate how such agents generalize to real-world autonomous vehicle control tasks, where advanced model-free deep RL algorithms fail. In particular, we set up a series of time-lap tasks for an F1TENTH racing robot, equipped with a high-dimensional LiDAR sensor, on a set of test tracks with a gradual increase in their complexity. In this continuous-control setting, we show that model-based agents capable of learning in imagination substantially outperform model-free agents with respect to performance, sample efficiency, successful task completion, and generalization. Moreover, we show that the generalization ability of model-based agents strongly depends on the choice of their observation model. We provide extensive empirical evidence for the effectiveness of world models provided with long enough memory horizons in sim2real tasks.},
  author       = {Brunnbauer, Axel and Berducci, Luigi and Brandstatter, Andreas and Lechner, Mathias and Hasani, Ramin and Rus, Daniela and Grosu, Radu},
  booktitle    = {2022 International Conference on Robotics and Automation},
  isbn         = {9781728196817},
  issn         = {1050-4729},
  location     = {Philadelphia, PA, United States},
  pages        = {7513--7520},
  publisher    = {IEEE},
  title        = {{Latent imagination facilitates zero-shot transfer in autonomous racing}},
  doi          = {10.1109/ICRA46639.2022.9811650},
  year         = {2022},
}

@inproceedings{10666,
  abstract     = {Adversarial training is an effective method to train deep learning models that are resilient to norm-bounded perturbations, with the cost of nominal performance drop. While adversarial training appears to enhance the robustness and safety of a deep model deployed in open-world decision-critical applications, counterintuitively, it induces undesired behaviors in robot learning settings. In this paper, we show theoretically and experimentally that neural controllers obtained via adversarial training are subjected to three types of defects, namely transient, systematic, and conditional errors. We first generalize adversarial training to a safety-domain optimization scheme allowing for more generic specifications. We then prove that such a learning process tends to cause certain error profiles. We support our theoretical results by a thorough experimental safety analysis in a robot-learning task. Our results suggest that adversarial training is not yet ready for robot learning.},
  author       = {Lechner, Mathias and Hasani, Ramin and Grosu, Radu and Rus, Daniela and Henzinger, Thomas A},
  booktitle    = {2021 IEEE International Conference on Robotics and Automation},
  isbn         = {978-1-7281-9078-5},
  issn         = {2577-087X},
  location     = {Xi'an, China},
  pages        = {4140--4147},
  title        = {{Adversarial training is not ready for robot learning}},
  doi          = {10.1109/ICRA48506.2021.9561036},
  year         = {2021},
}

@inproceedings{8704,
  abstract     = {Traditional robotic control suits require profound task-specific knowledge for designing, building and testing control software. The rise of Deep Learning has enabled end-to-end solutions to be learned entirely from data, requiring minimal knowledge about the application area. We design a learning scheme to train end-to-end linear dynamical systems (LDS)s by gradient descent in imitation learning robotic domains. We introduce a new regularization loss component together with a learning algorithm that improves the stability of the learned autonomous system, by forcing the eigenvalues of the internal state updates of an LDS to be negative reals. We evaluate our approach on a series of real-life and simulated robotic experiments, in comparison to linear and nonlinear Recurrent Neural Network (RNN) architectures. Our results show that our stabilizing method significantly improves test performance of LDS, enabling such linear models to match the performance of contemporary nonlinear RNN architectures. A video of the obstacle avoidance performance of our method on a mobile robot, in unseen environments, compared to other methods can be viewed at https://youtu.be/mhEsCoNao5E.},
  author       = {Lechner, Mathias and Hasani, Ramin and Rus, Daniela and Grosu, Radu},
  booktitle    = {Proceedings - IEEE International Conference on Robotics and Automation},
  isbn         = {9781728173955},
  issn         = {1050-4729},
  location     = {Paris, France},
  pages        = {5446--5452},
  publisher    = {IEEE},
  title        = {{Gershgorin loss stabilizes the recurrent neural network compartment of an end-to-end robot learning scheme}},
  doi          = {10.1109/ICRA40945.2020.9196608},
  year         = {2020},
}

