Publications Search
Akanksha Atrey, Kaleigh Clary, David Jensen
Exploratory Not Explanatory: Counterfactual Analysis of Saliency Maps for Deep Reinforcement Learning Proceedings Article
In: International Conference on Learning Representations, 2020.
Abstract | Links | BibTeX | Tags: Explainable AI
@inproceedings{atrey2020exploratory,
title = {Exploratory Not Explanatory: Counterfactual Analysis of Saliency Maps for Deep Reinforcement Learning},
author = {Akanksha Atrey and Kaleigh Clary and David Jensen},
url = {https://openreview.net/pdf?id=rkl3m1BFDB},
year = {2020},
date = {2020-01-01},
booktitle = {International Conference on Learning Representations},
abstract = {Saliency maps are frequently used to support explanations of the behavior of deep reinforcement learning (RL) agents. However, a review of how saliency maps are used in practice indicates that the derived explanations are often unfalsifiable and can be highly subjective. We introduce an empirical approach grounded in counterfactual reasoning to test the hypotheses generated from saliency maps and assess the degree to which they correspond to the semantics of RL environments. We use Atari games, a common benchmark for deep RL, to evaluate three types of saliency maps. Our results show the extent to which existing claims about Atari games can be evaluated and suggest that saliency maps are best viewed as an exploratory tool rather than an explanatory tool.},
keywords = {Explainable AI},
pubstate = {published},
tppubtype = {inproceedings}
}
Emma Tosch, Kaleigh Clary, John Foley, David Jensen
Toybox: A Suite of Environments for Experimental Evaluation of Deep Reinforcement Learning Miscellaneous
2019.
Abstract | Links | BibTeX | Tags: Explainable AI
@misc{DBLP:journals/corr/abs-1905-02825,
title = {Toybox: A Suite of Environments for Experimental Evaluation of Deep
Reinforcement Learning},
author = {Emma Tosch and Kaleigh Clary and John Foley and David Jensen},
url = {http://arxiv.org/abs/1905.02825},
year = {2019},
date = {2019-01-01},
journal = {CoRR},
volume = {abs/1905.02825},
abstract = {Evaluation of deep reinforcement learning (RL) is inherently challenging. In particular, learned policies are largely opaque, and hypotheses about the behavior of deep RL agents are difficult to test in black-box environments. Considerable effort has gone into addressing opacity, but almost no effort has been devoted to producing high quality environments for experimental evaluation of agent behavior. We present TOYBOX, a new high-performance, open-source* subset of Atari environments re-designed for the experimental evaluation of deep RL. We show that TOYBOX enables a wide range of experiments and analyses that are impossible in other environments.},
keywords = {Explainable AI},
pubstate = {published},
tppubtype = {misc}
}
Kaleigh Clary, Emma Tosch, John Foley, David Jensen
Let's Play Again: Variability of Deep Reinforcement Learning Agents in Atari Environments Miscellaneous
2018.
Abstract | Links | BibTeX | Tags: Explainable AI
@misc{clary2018variability,
title = {Let's Play Again: Variability of Deep Reinforcement Learning Agents in Atari Environments},
author = {Kaleigh Clary and Emma Tosch and John Foley and David Jensen},
url = {https://arxiv.org/pdf/1904.06312},
year = {2018},
date = {2018-01-01},
booktitle = {Critiquing and Correcting Trends in Machine Learning Workshop at Neural Information Processing Systems},
abstract = {Reproducibility in reinforcement learning is challenging: uncontrolled stochasticity from many sources, such as the learning algorithm, the learned policy, and the environment itself have led researchers to report the performance of learned agents using aggregate metrics of performance over multiple random seeds for a single environment. Unfortunately, there are still pernicious sources of variability in reinforcement learning agents that make reporting common summary statistics an unsound metric for performance. Our experiments demonstrate the variability of common agents used in the popular OpenAI Baselines repository. We make the case for reporting post-training agent performance as a distribution, rather than a point estimate.},
keywords = {Explainable AI},
pubstate = {published},
tppubtype = {misc}
}
Sam Witty, Jun Ki Lee, Emma Tosch, Akanksha Atrey, Michael Littman, David Jensen
Measuring and characterizing generalization in deep reinforcement learning Miscellaneous
2018.
Abstract | Links | BibTeX | Tags: Explainable AI
@misc{witty2018measuring,
title = {Measuring and characterizing generalization in deep reinforcement learning},
author = {Sam Witty and Jun Ki Lee and Emma Tosch and Akanksha Atrey and Michael Littman and David Jensen},
url = {https://arxiv.org/pdf/1812.02868.pdf},
year = {2018},
date = {2018-01-01},
journal = {arXiv preprint arXiv:1812.02868},
abstract = {Deep reinforcement-learning methods have achieved remarkable performance on challenging control tasks. Observations of the resulting behavior give the impression that the agent has constructed a generalized representation that supports insightful action decisions. We re-examine what is meant by generalization in RL, and propose several definitions based on an agent's performance in on-policy, off-policy, and unreachable states. We propose a set of practical methods for evaluating agents with these definitions of generalization. We demonstrate these techniques on a common benchmark task for deep RL, and we show that the learned networks make poor decisions for states that differ only slightly from on-policy states, even though those states are not selected adversarially. Taken together, these results call into question the extent to which deep Q-networks learn generalized representations, and suggest that more experimentation and analysis is necessary before claims of representation learning can be supported.},
keywords = {Explainable AI},
pubstate = {published},
tppubtype = {misc}
}
John Foley, Emma Tosch, Kaleigh Clary, David Jensen
Toybox: Better Atari Environments for Testing Reinforcement Learning Agents Proceedings Article
In: NeurIPS 2018 Workshop on Systems for ML, 2018.
Abstract | Links | BibTeX | Tags: Explainable AI
@inproceedings{foley2018toybox,
title = {Toybox: Better Atari Environments for Testing Reinforcement Learning Agents},
author = {John Foley and Emma Tosch and Kaleigh Clary and David Jensen},
url = {http://learningsys.org/nips18/assets/papers/83CameraReadySubmissionNIPS_Systems_for_ML_Workshop_2019___ToyBox%20(11).pdf},
year = {2018},
date = {2018-01-01},
booktitle = {NeurIPS 2018 Workshop on Systems for ML},
abstract = {It is a widely accepted principle that software without tests has bugs. Testing reinforcement learning agents is especially difficult because of the stochastic nature of both agents and environments, the complexity of state-of-the-art models,and the sequential nature of their predictions. Recently, the Arcade Learning Environment (ALE) has become one of the most widely used benchmark suites for deep learning research, and state-of-the-art Reinforcement Learning (RL) agents have been shown to routinely equal or exceed human performance on many ALE tasks. Since ALE is based on emulation of original Atari games, the environment does not provide semantically meaningful representations of internal game state. This means that ALE has limited utility as an environment for supporting testing or model introspection. We propose TOYBOX, a collection of reimplementations of these games that solves this critical problem and enables robust testing of RL agents.},
keywords = {Explainable AI},
pubstate = {published},
tppubtype = {inproceedings}
}