Skip to content

Commit 016b6a6

Browse files
committed
add more thumbnails
1 parent ca459b2 commit 016b6a6

File tree

8 files changed

+129
-0
lines changed

8 files changed

+129
-0
lines changed

src/components/publications.json

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,133 @@
11
[
2+
{
3+
"title": "Perception Tokens Enhance Visual Reasoning in Multimodal Language Models",
4+
"authors": [
5+
"Mahtab Bigverdi",
6+
"Zelun Luo",
7+
"Cheng-Yu Hsieh",
8+
"Ethan Shen",
9+
"Dongping Chen",
10+
"Linda G. Shapiro",
11+
"Ranjay Krishna"
12+
],
13+
"year": "2024",
14+
"links": {
15+
"pdf": "https://arxiv.org/abs/2412.03548"
16+
},
17+
"thumbnail": "/perctokens.png"
18+
},
19+
{
20+
"title": "REALEDIT: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations",
21+
"authors": [
22+
"Peter Sushko",
23+
"Ayana Bharadwaj",
24+
"Zhi Yang Lim",
25+
"Vasily Ilin",
26+
"Ben Caffee",
27+
"Dongping Chen",
28+
"Mohammadreza Salehi",
29+
"Cheng-Yu Hsieh",
30+
"Ranjay Krishna"
31+
],
32+
"year": "preprint",
33+
"links": {
34+
"pdf": "https://arxiv.org/abs/2502.03629"
35+
},
36+
"thumbnail": "/realedit.png"
37+
},
38+
{
39+
"title": "Learning to Build by Building Your Own Instructions",
40+
"authors": [
41+
"Aaron Walsman",
42+
"Muru Zhang",
43+
"Adam Fishman",
44+
"Ali Farhadi",
45+
"Dieter Fox"
46+
],
47+
"year": "2024",
48+
"links": {
49+
"pdf": "https://arxiv.org/abs/2410.01111"
50+
},
51+
"thumbnail": "/build.png"
52+
},
53+
{
54+
"title": "ActionAtlas: A VideoQA Benchmark for Domain-specialized Action Recognition",
55+
"authors": [
56+
"Mohammadreza Salehi",
57+
"Jae Sung Park",
58+
"Tanush Yadav",
59+
"Aditya Kusupati",
60+
"Ranjay Krishna",
61+
"Yejin Choi",
62+
"Hannaneh Hajishirzi",
63+
"Ali Farhadi"
64+
],
65+
"year": "2024",
66+
"venue": "NeurIPS 2024 Datasets and Benchmarks",
67+
"links": {
68+
"pdf": "https://arxiv.org/abs/2410.05774"
69+
},
70+
"thumbnail": "/actionatlas.png"
71+
},
72+
{
73+
"title": "From an Image to a Scene: Learning to Imagine the World from a Million 360 Videos",
74+
"authors": [
75+
"Matthew Wallingford",
76+
"Anand Bhattad",
77+
"Aditya Kusupati",
78+
"Vivek Ramanujan",
79+
"Matt Deitke",
80+
"Sham Kakade",
81+
"Aniruddha Kembhavi",
82+
"Roozbeh Mottaghi",
83+
"Wei-Chiu Ma",
84+
"Ali Farhadi"
85+
],
86+
"year": "2024",
87+
"venue": "NeurIPS 2024",
88+
"links": {
89+
"pdf": "https://arxiv.org/abs/2412.07770"
90+
},
91+
"thumbnail": "/360.png"
92+
},
93+
{
94+
"title": "The One RING: a Robotic Indoor Navigation Generalist",
95+
"authors": [
96+
"Ainaz Eftekhar",
97+
"Luca Weihs",
98+
"Rose Hendrix",
99+
"Ege Caglar",
100+
"Jordi Salvador",
101+
"Alvaro Herrasti",
102+
"Winson Han",
103+
"Eli VanderBil",
104+
"Aniruddha Kembhavi",
105+
"Ali Farhadi",
106+
"Ranjay Krishna",
107+
"Kiana Ehsani",
108+
"Kuo-Hao Zeng"
109+
],
110+
"year": "preprint",
111+
"links": {
112+
"pdf": "https://arxiv.org/abs/2412.14401"
113+
},
114+
"thumbnail": "/ring.png"
115+
},
116+
{
117+
"title": "When Worse is Better: Navigating the compression-generation tradeoff in visual tokenization",
118+
"authors": [
119+
"Vivek Ramanujan",
120+
"Kushal Tirumala",
121+
"Armen Aghajanyan",
122+
"Luke Zettlemoyer",
123+
"Ali Farhadi"
124+
],
125+
"year": "preprint",
126+
"links": {
127+
"pdf": "https://arxiv.org/abs/2412.16326"
128+
},
129+
"thumbnail": "/worse.png"
130+
},
2131
{
3132
"title": "Offline Training of Language Model Agents with Functions as Learnable Weights",
4133
"authors": [

static/360.png

384 KB
Loading

static/actionatlas.png

325 KB
Loading

static/build.png

142 KB
Loading

static/perctokens.png

148 KB
Loading

static/realedit.png

86.5 KB
Loading

static/ring.png

462 KB
Loading

static/worse.png

119 KB
Loading

0 commit comments

Comments
 (0)