@article{kausik2023tom,
  title = "Learning Policy-Aware Models for Model-Based Reinforcement Learning via Transition Occupancy Matching",
  author = "Yecheng Jason Ma and Kausik Sivakumar and Jason Yen and Osbert Bastani and Dinesh Jayaraman",
  journal = {L4DC},
  year = {2023},
  month = {Jun},
  pub_type = {conference},
  abstract = {Standard model-based reinforcement learning (MBRL) approaches fit a transition model of the environment to all past experience, but this wastes model capacity on data that is irrelevant for policy improvement. We instead propose a new "transition occupancy matching" (TOM) objective for MBRL model learning: a model is good to the extent that the current policy experiences the same distribution of transitions inside the model as in the real environment. We derive TOM directly from a novel lower bound on the standard reinforcement learning objective. To optimize TOM, we show how to reduce it to a form of importance weighted maximum-likelihood estimation, where the automatically computed importance weights identify policy-relevant past experiences from a replay buffer, enabling stable optimization. TOM thus offers a plug-and-play model learning sub-routine that is compatible with any backbone MBRL algorithm. On various Mujoco continuous robotic control tasks, we show that TOM successfully focuses model learning on policy-relevant experience and drives policies faster to higher task rewards than alternative model learning approaches.},
  url_pdf = {/publication/kausik-2023-tom/kausik-2023-tom.pdf},
  url_project = {https://penn-pal-lab.github.io/TOM/},
  url_code = {https://github.com/kausiksivakumar/TOM},
  url = {https://arxiv.org/abs/2305.12663},
  url_arxiv = {https://arxiv.org/abs/2305.12663},
}
