Publications | Ruohan Gao

2025

Multisensory Machine Intelligence

Ruohan Gao

AI Magazine, 2025

AAAI-25 New Faculty Highlights
Bib PDF

Differentiable Room Acoustic Rendering with Multi-View Vision Priors

Derong Jin, and Ruohan Gao

International Conference on Computer Vision (ICCV), 2025

Oral Presentation
Bib PDF Video Project Page

@inproceedings{jin2025avdar,
  title = {Differentiable Room Acoustic Rendering with Multi-View Vision Priors},
  author = {Jin, Derong and Gao, Ruohan},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2025},
}

EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception

Sanjoy Chowdhury, Subrata Biswas, Sayan Nag, Tushar Nagarajan, Calvin Murdock, Ishwarya Ananthabhotla, Yijun Qian, Vamsi Krishna Ithapu, Dinesh Manocha, and Ruohan Gao

International Conference on Computer Vision (ICCV), 2025

Bib PDF Project Page

@inproceedings{chowdhury2025egoadapt,
  title = {EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception},
  author = {Chowdhury, Sanjoy and Biswas, Subrata and Nag, Sayan and Nagarajan, Tushar and Murdock, Calvin and Ananthabhotla, Ishwarya and Qian, Yijun and Ithapu, Vamsi Krishna and Manocha, Dinesh and Gao, Ruohan},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2025},
}

GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning

Kelin Yu*, Sheng Zhang*, Harshit Soora, Furong Huang, Heng Huang, Pratap Tokekar, and Ruohan Gao

International Conference on Computer Vision (ICCV), 2025

Bib PDF Project Page

@inproceedings{yu2025genflowrl,
  title = {GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning},
  author = {Yu*, Kelin and Zhang*, Sheng and Soora, Harshit and Huang, Furong and Huang, Heng and Tokekar, Pratap and Gao, Ruohan},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2025},
}

AVTrustBench: Assessing and Enhancing Reliability and Robustness in Audio-Visual LLMs

Sanjoy Chowdhury*, Sayan Nag*, Subhrajyoti Dasgupta*, Yaoting Wang, Mohamed Elhoseiny, Ruohan Gao, and Dinesh Manocha

International Conference on Computer Vision (ICCV), 2025

Bib PDF Code Dataset Project Page

@inproceedings{chowdhury2025avtrustbench,
  title = {AVTrustBench: Assessing and Enhancing Reliability and Robustness in Audio-Visual LLMs},
  author = {Chowdhury*, Sanjoy and Nag*, Sayan and Dasgupta*, Subhrajyoti and Wang, Yaoting and Elhoseiny, Mohamed and Gao, Ruohan and Manocha, Dinesh},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2025},
}

Aurelia: Test-time Reasoning Distillation in Audio-Visual LLMs

Sanjoy Chowdhury*, Hanan Gani*, Nishit Anand, Sayan Nag, Ruohan Gao, Mohamed Elhoseiny, Salman Khan, and Dinesh Manocha

International Conference on Computer Vision (ICCV), 2025

Bib PDF Code Dataset Project Page

@inproceedings{chowdhury2025aurelia,
  title = {Aurelia: Test-time Reasoning Distillation in Audio-Visual LLMs},
  author = {Chowdhury*, Sanjoy and Gani*, Hanan and Anand, Nishit and Nag, Sayan and Gao, Ruohan and Elhoseiny, Mohamed and Khan, Salman and Manocha, Dinesh},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2025},
}

Towards Perception-Informed Latent HRTF Representations

You Zhang, Andrew Francl, Ruohan Gao, Paul Calamia, Zhiyao Duan, and Ishwarya Ananthabhotla

IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), 2025

Best Student Paper Award
Bib PDF

@inproceedings{zhang2025towards,
  title = {Towards Perception-Informed Latent HRTF Representations},
  author = {Zhang, You and Francl, Andrew and Gao, Ruohan and Calamia, Paul and Duan, Zhiyao and Ananthabhotla, Ishwarya},
  booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
  year = {2025},
}

Hearing Anywhere in Any Environment

Xiulong Liu, Anurag Kumar, Paul Calamia, Sebastià V. Amengual Garí, Calvin Murdock, Ishwarya Ananthabhotla, Philip Robinson, Eli Shlizerman, Vamsi Krishna Ithapu, and Ruohan Gao

Conference on Computer Vision and Pattern Recognition (CVPR), 2025

Bib PDF Code Dataset Project Page

@inproceedings{liu2025haae,
  title = {Hearing Anywhere in Any Environment},
  author = {Liu, Xiulong and Kumar, Anurag and Calamia, Paul and Garí, Sebastià V. Amengual and Murdock, Calvin and Ananthabhotla, Ishwarya and Robinson, Philip and Shlizerman, Eli and Ithapu, Vamsi Krishna and Gao, Ruohan},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2025},
}

Learning to Highlight Audio by Watching Movies

Chao Huang, Ruohan Gao, J. M. F. Tsang, Jan Kurcius, Cagdas Bilen, Chenliang Xu, Anurag Kumar, and Sanjeel Parekh

Conference on Computer Vision and Pattern Recognition (CVPR), 2025

Bib PDF Code Project Page

@inproceedings{huang2025visal,
  title = {Learning to Highlight Audio by Watching Movies},
  author = {Huang, Chao and Gao, Ruohan and Tsang, J. M. F. and Kurcius, Jan and Bilen, Cagdas and Xu, Chenliang and Kumar, Anurag and Parekh, Sanjeel},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2025},
}

2024

Spherical World-Locking for Audio-Visual Localization in Egocentric Videos

Heeseung Yun, Ruohan Gao, Ishwarya Ananthabhotla, Anurag Kumar, Jacob Donley, Chao Li, Gunhee Kim, Vamsi Krishna Ithapu, and Calvin Murdock

European Conference on Computer Vision (ECCV), 2024

Bib PDF Supp Project Page

@inproceedings{yun2024swl,
  title = {Spherical World-Locking for Audio-Visual Localization in Egocentric Videos},
  author = {Yun, Heeseung and Gao, Ruohan and Ananthabhotla, Ishwarya and Kumar, Anurag and Donley, Jacob and Li, Chao and Kim, Gunhee and Ithapu, Vamsi Krishna and Murdock, Calvin},
  booktitle = {European Conference on Computer Vision (ECCV)},
  year = {2024},
}

Meerkat: Audio-Visual Large Language Model for Grounding in Space and Time

Sanjoy Chowdhury*, Sayan Nag*, Subhrajyoti Dasgupta*, Jun Chen, Mohamed Elhoseiny, Ruohan Gao, and Dinesh Manocha

European Conference on Computer Vision (ECCV), 2024

Bib PDF

@inproceedings{chowdhury2024meerkat,
  title = {Meerkat: Audio-Visual Large Language Model for Grounding in Space and Time},
  author = {Chowdhury*, Sanjoy and Nag*, Sayan and Dasgupta*, Subhrajyoti and Chen, Jun and Elhoseiny, Mohamed and Gao, Ruohan and Manocha, Dinesh},
  booktitle = {European Conference on Computer Vision (ECCV)},
  year = {2024},
}

DiffSound: Differentiable Modal Sound Rendering and Inverse Rendering for Diverse Inference Tasks

Xutong Jin*, Chenxi Xu*, Ruohan Gao, Jiajun Wu, Guoping Wang, and Sheng Li

ACM Special Interest Group on Computer Graphics and Interactive Techniques Conference (SIGGRAPH), 2024

Bib PDF Code Project Page

@inproceedings{jin2024diffsound,
  title = {DiffSound: Differentiable Modal Sound Rendering and Inverse Rendering for Diverse Inference Tasks},
  author = {Jin*, Xutong and Xu*, Chenxi and Gao, Ruohan and Wu, Jiajun and Wang, Guoping and Li, Sheng},
  booktitle = {ACM Special Interest Group on Computer Graphics and Interactive Techniques Conference (SIGGRAPH)},
  year = {2024},
}

Hearing Anything Anywhere

Mason L. Wang*, Ryosuke Sawata*, Samuel Clarke, Ruohan Gao, Shangzhe Wu, and Jiajun Wu

Conference on Computer Vision and Pattern Recognition (CVPR), 2024

Bib PDF Code Dataset Video Project Page

@inproceedings{wang2024haa,
  title = {Hearing Anything Anywhere},
  author = {Wang*, Mason L. and Sawata*, Ryosuke and Clarke, Samuel and Gao, Ruohan and Wu, Shangzhe and Wu, Jiajun},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2024},
}

The Audio-Visual Conversational Graph: From an Egocentric-Exocentric Perspective

Wenqi Jia, Miao Liu, Hao Jiang, Ishwarya Ananthabhotla, James M Rehg, Vamsi Krishna Ithapu, and Ruohan Gao

Conference on Computer Vision and Pattern Recognition (CVPR), 2024

Bib PDF Supp Code Project Page

@inproceedings{jia2024avgraph,
  title = {The Audio-Visual Conversational Graph: From an Egocentric-Exocentric Perspective},
  author = {Jia, Wenqi and Liu, Miao and Jiang, Hao and Ananthabhotla, Ishwarya and Rehg, James M and Ithapu, Vamsi Krishna and Gao, Ruohan},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2024},
}

2023

SoundCam: A Dataset for Tasks in Tracking and Identifying Humans from Real Room Acoustics

Mason Wang*, Samuel Clarke*, Jui-Hsien Wang, Ruohan Gao, and Jiajun Wu

Conference on Neural Information Processing Systems Datasets and Benchmarks Track (NeurIPS), 2023

Bib PDF Dataset Video Project Page

@inproceedings{wang2023soundcam,
  title = {SoundCam: A Dataset for Tasks in Tracking and Identifying Humans from Real Room Acoustics},
  author = {Wang*, Mason and Clarke*, Samuel and Wang, Jui-Hsien and Gao, Ruohan and Wu, Jiajun},
  booktitle = {Conference on Neural Information Processing Systems Datasets and Benchmarks Track (NeurIPS)},
  year = {2023},
}

NOIR: Neural Signal Operated Intelligent Robot for Everyday Activities

Ruohan Zhang*, Sharon Lee*, Minjune Hwang*, Ayano Hiranaka*, Chen Wang, Wensi Ai, Jin Jie Ryan Tan, Shreya Gupta, Yilun Hao, Gabrael Levine, and 4 more authors

Conference on Robot Learning (CoRL), 2023

Bib PDF Project Page

@inproceedings{Lee2023noir,
  title = {NOIR: Neural Signal Operated Intelligent Robot for Everyday Activities},
  author = {Zhang*, Ruohan and Lee*, Sharon and Hwang*, Minjune and Hiranaka*, Ayano and Wang, Chen and Ai, Wensi and Tan, Jin Jie Ryan and Gupta, Shreya and Hao, Yilun and Levine, Gabrael and Gao, Ruohan and Norcia, Anthony and Fei-Fei, Li and Wu, Jiajun},
  booktitle = {Conference on Robot Learning (CoRL)},
  year = {2023},
}

Visually-Guided Audio Spatialization in Video with Geometry-Aware Multi-task Learning

Rishabh Garg, Ruohan Gao, and Kristen Grauman

International Journal of Computer Vision (IJCV), 2023

Special Issue for Best Papers of BMVC
Bib PDF Dataset Project Page

@inproceedings{garg2023visually,
  title = {Visually-Guided Audio Spatialization in Video with Geometry-Aware Multi-task Learning},
  author = {Garg, Rishabh and Gao, Ruohan and Grauman, Kristen},
  booktitle = {International Journal of Computer Vision (IJCV)},
  year = {2023},
}

The ObjectFolder Benchmark: Multisensory Object-Centric Learning with Neural and Real Objects

Ruohan Gao*, Yiming Dou*, Hao Li*, Tanmay Agarwal, Jeannette Bohg, Yunzhu Li, Li Fei-Fei, and Jiajun Wu

Conference on Computer Vision and Pattern Recognition (CVPR), 2023

Bib PDF Code Video Project Page Interactive Demo

@inproceedings{gao2023ObjectFolderBM,
  title = {The ObjectFolder Benchmark: Multisensory Object-Centric Learning with Neural and Real Objects},
  author = {Gao*, Ruohan and Dou*, Yiming and Li*, Hao and Agarwal, Tanmay and Bohg, Jeannette and Li, Yunzhu and Fei-Fei, Li and Wu, Jiajun},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2023},
}

RealImpact: A Dataset of Impact Sound Fields for Real Objects

Samuel Clarke, Ruohan Gao, Mason Wang, Mark Rau, Julia Xu, Mark Rau, Jui-Hsien Wang, Doug James, and Jiajun Wu

Conference on Computer Vision and Pattern Recognition (CVPR), 2023

Highlight Paper
Bib PDF Supp Code Video Project Page

@inproceedings{clarke2023realimpact,
  title = {RealImpact: A Dataset of Impact Sound Fields for Real Objects},
  author = {Clarke, Samuel and Gao, Ruohan and Wang, Mason and Rau, Mark and Xu, Julia and Rau, Mark and Wang, Jui-Hsien and James, Doug and Wu, Jiajun},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2023},
}

Learning Object-Centric Neural Scattering Functions for Free-Viewpoint Relighting and Scene Composition

Hong-Xing Yu*, Michelle Guo*, Alireza Fathi, Yen-Yu Chang, Eric Ryan Chan, Ruohan Gao, Thomas Funkhouser, and Jiajun Wu

Transactions on Machine Learning Research (TMLR), 2023

Bib PDF Supp Code Video Project Page

@inproceedings{yu2023osf,
  title = {Learning Object-Centric Neural Scattering Functions for Free-Viewpoint Relighting and Scene Composition},
  author = {Yu*, Hong-Xing and Guo*, Michelle and Fathi, Alireza and Chang, Yen-Yu and Chan, Eric Ryan and Gao, Ruohan and Funkhouser, Thomas and Wu, Jiajun},
  booktitle = {Transactions on Machine Learning Research (TMLR)},
  year = {2023},
}

Differentiable Physics Simulation of Dynamics-Augmented Neural Objects

Simon Le Cleac’h, Hong-Xing Yu, Michelle Guo, Taylor A. Howell, Ruohan Gao, Jiajun Wu, Zachary Manchester, and Mac Schwager

Robotics and Automation Letters (RA-L), 2023

Bib PDF Video

@article{simon2023ral,
  title = {Differentiable Physics Simulation of Dynamics-Augmented Neural Objects},
  author = {Cleac'h, Simon Le and Yu, Hong-Xing and Guo, Michelle and Howell, Taylor A. and Gao, Ruohan and Wu, Jiajun and Manchester, Zachary and Schwager, Mac},
  journal = {Robotics and Automation Letters (RA-L)},
  year = {2023},
}

Sonicverse: A Multisensory Simulation Platform for Training Household Agents that See and Hear

Ruohan Gao*, Hao Li*, Gokul Dharan, Zhuzhu Wang, Chengshu Li, Fei Xia, Silvio Savarese, Li Fei-Fei, and Jiajun Wu

International Conference on Robotics and Automation (ICRA),, 2023

Bib PDF Code Video Project Page Media Coverage

@inproceedings{gao2023sonicverse,
  title = {Sonicverse: A Multisensory Simulation Platform for Training Household Agents that See and Hear},
  author = {Gao*, Ruohan and Li*, Hao and Dharan, Gokul and Wang, Zhuzhu and Li, Chengshu and Xia, Fei and Savarese, Silvio and Fei-Fei, Li and Wu, Jiajun},
  booktitle = {International Conference on Robotics and Automation (ICRA),},
  year = {2023},
}

An Extensible Multi-modal Multi-task Object Dataset with Materials

Trevor Scott Standley, Ruohan Gao, Dawn Chen, Jiajun Wu, and Silvio Savarese

International Conference on Learning Representations (ICLR), 2023

Bib PDF Dataset Project Page Interactive Demo

@inproceedings{standley2023emma,
  title = {An Extensible Multi-modal Multi-task Object Dataset with Materials},
  author = {Standley, Trevor Scott and Gao, Ruohan and Chen, Dawn and Wu, Jiajun and Savarese, Silvio},
  booktitle = {International Conference on Learning Representations (ICLR)},
  year = {2023},
}

2022

See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation

Hao Li*, Yizhi Zhang*, Junzhe Zhu, Shaoxiong Wang, Michelle A. Lee, Huazhe Xu, Edward Adelson, Li Fei-Fei, Ruohan Gao†, and Jiajun Wu†

Conference on Robot Learning (CoRL), 2022

Bib PDF Supp Video Project Page

@inproceedings{li2022seehearfeel,
  title = {See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation},
  author = {Li*, Hao and Zhang*, Yizhi and Zhu, Junzhe and Wang, Shaoxiong and Lee, Michelle A. and Xu, Huazhe and Adelson, Edward and Fei-Fei, Li and Gao†, Ruohan and Wu†, Jiajun},
  booktitle = {Conference on Robot Learning (CoRL)},
  year = {2022},
}

ObjectFolder 2.0: A Multisensory Object Dataset for Sim2Real Transfer

Ruohan Gao*, Zilin Si*, Yen-Yu Chang*, Samuel Clarke, Jeannette Bohg, Li Fei-Fei, Wenzhen Yuan, and Jiajun Wu

Conference on Computer Vision and Pattern Recognition (CVPR), 2022

Bib PDF Supp Dataset Project Page

@inproceedings{gao2022ObjectFolderV2,
  title = {ObjectFolder 2.0: A Multisensory Object Dataset for Sim2Real Transfer},
  author = {Gao*, Ruohan and Si*, Zilin and Chang*, Yen-Yu and Clarke, Samuel and Bohg, Jeannette and Fei-Fei, Li and Yuan, Wenzhen and Wu, Jiajun},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2022},
}

Visual Acoustic Matching

Changan Chen, Ruohan Gao, Paul Calamia, and Kristen Grauman

Conference on Computer Vision and Pattern Recognition (CVPR), 2022

Oral Presentation
Bib PDF Code Project Page Media Coverage

@inproceedings{chen2022visual,
  title = {Visual Acoustic Matching},
  author = {Chen, Changan and Gao, Ruohan and Calamia, Paul and Grauman, Kristen},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2022},
}

2021

ObjectFolder: A Dataset of Objects with Implicit Visual, Auditory, and Tactile Representations

Ruohan Gao, Yen-Yu Chang, Shivani Mall, Li Fei-Fei, and Jiajun Wu

Conference on Robot Learning (CoRL), 2021

Bib PDF Supp Dataset Project Page

@inproceedings{gao2021ObjectFolder,
  title = {ObjectFolder: A Dataset of Objects with Implicit Visual, Auditory, and Tactile Representations},
  author = {Gao, Ruohan and Chang, Yen-Yu and Mall, Shivani and Fei-Fei, Li and Wu, Jiajun},
  booktitle = {Conference on Robot Learning (CoRL)},
  year = {2021},
}

DiffImpact: Differentiable Rendering and Identification of Impact Sounds

Samuel Clarke, Negin Heravi, Mark Rau, Ruohan Gao, Jiajun Wu, Doug James, and Jeannette Bohg

Conference on Robot Learning (CoRL), 2021

Oral Presentation
Bib PDF Supp Code Video Project Page

@inproceedings{clarke2021diffimpact,
  title = {DiffImpact: Differentiable Rendering and Identification of Impact Sounds},
  author = {Clarke, Samuel and Heravi, Negin and Rau, Mark and Gao, Ruohan and Wu, Jiajun and James, Doug and Bohg, Jeannette},
  booktitle = {Conference on Robot Learning (CoRL)},
  year = {2021},
}

Geometry-Aware Multi-Task Learning for Binaural Audio Generation from Video

Rishabh Garg, Ruohan Gao, and Kristen Grauman

British Machine Vision Conference (BMVC), 2021

Best Paper Award Runner-Up
Bib PDF Supp Dataset Project Page

@inproceedings{garg2021geometry,
  title = {Geometry-Aware Multi-Task Learning for Binaural Audio Generation from Video},
  author = {Garg, Rishabh and Gao, Ruohan and Grauman, Kristen},
  booktitle = {British Machine Vision Conference (BMVC)},
  year = {2021},
}

Look and Listen: From Semantic to Spatial Audio-Visual Perception

Ruohan Gao

Ph.D. Dissertation, 2021

Michael H. Granof Award, UT Austin’s Top 1 Doctoral Dissertation
Bib PDF Media Coverage

Visualvoice: Audio-visual speech separation with cross-modal consistency

Ruohan Gao, and Kristen Grauman

Conference on Computer Vision and Pattern Recognition (CVPR), 2021

Bib PDF Supp Code Project Page Media Coverage

@inproceedings{gao2021visualvoice,
  title = {Visualvoice: Audio-visual speech separation with cross-modal consistency},
  author = {Gao, Ruohan and Grauman, Kristen},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2021},
}

Learning to Set Waypoints for Audio-Visual Navigation

Changan Chen, Sagnik Majumder, Ziad Al-Halah, Ruohan Gao, Santhosh Kumar Ramakrishnan, and Kristen Grauman

International Conference on Learning Representations (ICLR), 2021

Bib PDF Code Project Page

@inproceedings{chen2021waypoints,
  title = {Learning to Set Waypoints for Audio-Visual Navigation},
  author = {Chen, Changan and Majumder, Sagnik and Al-Halah, Ziad and Gao, Ruohan and Ramakrishnan, Santhosh Kumar and Grauman, Kristen},
  booktitle = {International Conference on Learning Representations (ICLR)},
  year = {2021},
}

2020

VisualEchoes: Spatial Visual Representation Learning through Echolocation

Ruohan Gao, Changan Chen, Ziad Al-Halah, Carl Schissler, and Kristen Grauman

European Conference on Computer Vision (ECCV), 2020

Bib PDF Supp Dataset Project Page

@inproceedings{gao2020visualechoes,
  title = {VisualEchoes: Spatial Visual Representation Learning through Echolocation},
  author = {Gao, Ruohan and Chen, Changan and Al-Halah, Ziad and Schissler, Carl and Grauman, Kristen},
  booktitle = {European Conference on Computer Vision (ECCV)},
  year = {2020},
}

Listen to Look: Action Recognition by Previewing Audio

Ruohan Gao, Tae-Hyun Oh, Kristen Grauman, and Lorenzo Torresani

Conference on Computer Vision and Pattern Recognition (CVPR), 2020

Bib PDF Supp Code Poster Project Page

@inproceedings{gao2020listentolook,
  title = {Listen to Look: Action Recognition by Previewing Audio},
  author = {Gao, Ruohan and Oh, Tae-Hyun and Grauman, Kristen and Torresani, Lorenzo},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2020},
}

2019

Co-Separating Sounds of Visual Objects

Ruohan Gao, and Kristen Grauman

International Conference on Computer Vision (ICCV), 2019

Bib PDF Supp Code Poster Project Page

@inproceedings{gao2019coseparation,
  title = {Co-Separating Sounds of Visual Objects},
  author = {Gao, Ruohan and Grauman, Kristen},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2019},
}

2.5D Visual Sound

Ruohan Gao, and Kristen Grauman

Conference on Computer Vision and Pattern Recognition (CVPR), 2019

Best Paper Award Finalist
Bib PDF Supp Code Dataset Video Project Page Media Coverage

@inproceedings{gao2019visual-sound,
  title = {2.5D Visual Sound},
  author = {Gao, Ruohan and Grauman, Kristen},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2019},
}

2018

Learning to Separate Object Sounds by Watching Unlabeled Video

Ruohan Gao, Rogerio Feris, and Kristen Grauman

European Conference on Computer Vision (ECCV), 2018

Oral Presentation
Bib PDF Supp Code Video Poster Project Page

@inproceedings{gao2018object-sounds,
  title = {Learning to Separate Object Sounds by Watching Unlabeled Video},
  author = {Gao, Ruohan and Feris, Rogerio and Grauman, Kristen},
  booktitle = {European Conference on Computer Vision (ECCV)},
  year = {2018},
}

ShapeCodes: Self-Supervised Feature Learning by Lifting Views to Viewgrids

Dinesh Jayaraman, Ruohan Gao, and Kristen Grauman

European Conference on Computer Vision (ECCV), 2018

Bib PDF Supp

@inproceedings{jayaraman2018shape,
  title = {ShapeCodes: Self-Supervised Feature Learning by Lifting Views to Viewgrids},
  author = {Jayaraman, Dinesh and Gao, Ruohan and Grauman, Kristen},
  booktitle = {European Conference on Computer Vision (ECCV)},
  year = {2018},
}

Im2Flow: Motion Hallucination from Static Images for Action Recognition

Ruohan Gao, Bo Xiong, and Kristen Grauman

Conference on Computer Vision and Pattern Recognition (CVPR), 2018

Oral Presentation
Bib PDF Supp Code Video Poster Project Page

@inproceedings{gao2018im2flow,
  title = {Im2Flow: Motion Hallucination from Static Images for Action Recognition},
  author = {Gao, Ruohan and Xiong, Bo and Grauman, Kristen},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2018},
}

2017

On-Demand Learning for Deep Image Restoration

Ruohan Gao, and Kristen Grauman

International Conference on Computer Vision (ICCV), 2017

Bib PDF Supp Code Poster Project Page

@inproceedings{gao2017on-demand,
  title = {On-Demand Learning for Deep Image Restoration},
  author = {Gao, Ruohan and Grauman, Kristen},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2017},
}

2016

Object-Centric Representation Learning from Unlabeled Videos

Ruohan Gao, Dinesh Jayaraman, and Kristen Grauman

Asian Conference on Computer Vision (ACCV), 2016

Bib PDF Poster Project Page

@inproceedings{gao2016object-centric,
  title = {Object-Centric Representation Learning from Unlabeled Videos},
  author = {Gao, Ruohan and Jayaraman, Dinesh and Grauman, Kristen},
  booktitle = {Asian Conference on Computer Vision (ACCV)},
  year = {2016},
}

IEEE ICC

Accelerating Graph Mining Algorithms via Uniform Random Edge Sampling

Ruohan Gao, Huanle Xu, Pili Hu, and Wing Cheong Lau

IEEE International Conference on Communications (ICC), 2016

Bib PDF

@inproceedings{gao2016accelerating,
  title = {Accelerating Graph Mining Algorithms via Uniform Random Edge Sampling},
  author = {Gao, Ruohan and Xu, Huanle and Hu, Pili and and Lau, Wing Cheong},
  booktitle = {IEEE International Conference on Communications (ICC)},
  year = {2016},
}

2015

IEEE GLOBECOM

Graph Property Preservation under Community-Based Sampling

Ruohan Gao, Pili Hu, and Wing Cheong Lau

IEEE Global Communications Conference (GLOBECOM), 2015

Bib PDF

@inproceedings{gao2015graph,
  title = {Graph Property Preservation under Community-Based Sampling},
  author = {Gao, Ruohan and Hu, Pili and and Lau, Wing Cheong},
  booktitle = {IEEE Global Communications Conference (GLOBECOM)},
  year = {2015},
}