@InProceedings{LAVISH_arxiv2022, author = {Lin, Yan-Bo an Sung, Yi-Lin and Lei, Jie and Bansal, Mohit and Bertasius, Gedas}, title = {Vision Transformers are Parameter-Efficient Audio-Visual Learners}, booktitle = {arXiv}, year = {2022} }