-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
389 lines (343 loc) · 17.9 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
<!-- Replace the content tag with appropriate information -->
<meta name="description" content="DESCRIPTION META TAG">
<meta property="og:title" content="SOCIAL MEDIA TITLE TAG"/>
<meta property="og:description" content="SOCIAL MEDIA DESCRIPTION TAG TAG"/>
<meta property="og:url" content="URL OF THE WEBSITE"/>
<!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
<meta property="og:image" content="static/image/your_banner_image.png" />
<meta property="og:image:width" content="1200"/>
<meta property="og:image:height" content="630"/>
<meta name="twitter:title" content="TWITTER BANNER TITLE META TAG">
<meta name="twitter:description" content="TWITTER BANNER DESCRIPTION META TAG">
<!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
<meta name="twitter:image" content="static/images/your_twitter_banner_image.png">
<meta name="twitter:card" content="summary_large_image">
<!-- Keywords for your paper to be indexed by-->
<meta name="keywords" content="KEYWORDS SHOULD BE PLACED HERE">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Co-MTP: A Cooperative Trajectory Prediction Framework with Multi-Temporal Fusion for Autonomous Driving</title>
<link rel="icon" type="image/x-icon" href="static/images/favicon.ico">
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
rel="stylesheet">
<link rel="stylesheet" href="static/css/bulma.min.css">
<link rel="stylesheet" href="static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="static/css/bulma-slider.min.css">
<link rel="stylesheet" href="static/css/fontawesome.all.min.css">
<link rel="stylesheet"
href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="static/css/index.css">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
<script defer src="static/js/fontawesome.all.min.js"></script>
<script src="static/js/bulma-carousel.min.js"></script>
<script src="static/js/bulma-slider.min.js"></script>
<script src="static/js/index.js"></script>
</head>
<body>
<style>
.custom-container {
max-width: 80%; /* 修改最大宽度为页面宽度的80% */
}
</style>
<style>
.full-container {
max-width: 100%; /* 最大宽度为页面宽度的100% */
}
</style>
<section class="hero">
<div class="hero-body">
<div class="container full-container">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-1 publication-title">Co-MTP: A Cooperative Trajectory Prediction Framework with Multi-Temporal Fusion for Autonomous Driving</h1>
<div class="is-size-5 publication-authors">
<!-- Paper authors -->
<span class="author-block">
<a href="FIRST AUTHOR PERSONAL LINK" target="_blank" rel="noopener noreferrer">Xinyu Zhang</a><sup>*</sup>,</span>
<span class="author-block">
<a href="https://zewei-zhou.github.io/" target="_blank" rel="noopener noreferrer">Zewei Zhou</a><sup>*</sup>,</span>
<span class="author-block">
<a href="https://wi11ione.github.io/" target="_blank" rel="noopener noreferrer">Zhaoyi Wang</a>,</span>
<span class="author-block">
<a href="FOURTH AUTHOR PERSONAL LINK" target="_blank" rel="noopener noreferrer">Yangjie Ji</a>,</span>
<span class="author-block">
<a href="https://www.linkedin.com/in/yanjun-huang-46099b82/" target="_blank" rel="noopener noreferrer">Yanjun Huang</a>,</span>
<span class="author-block">
<a href="https://scholar.google.com/citations?hl=zh-CN&user=n_eA148AAAAJ" target="_blank" rel="noopener noreferrer">Hong Chen</a>
</span>
</div>
<div class="is-size-5 publication-authors">
<span class="author-block">Tongji University</span>
<span class="eql-cntrb"><small><br><sup>*</sup>Indicates Equal Contribution</small></span>
</div>
<div class="has-text-centered" style="margin-top: 10px;">
<strong class="is-size-4", style="color: red;">ICRA 2025</strong>
</div>
<div class="column has-text-centered">
<div class="publication-links">
<!-- Arxiv PDF link -->
<span class="link-block">
<a href="static/pdfs/ICRA_cooperative_prediction.pdf" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Paper</span>
</a>
</span>
<!-- Supplementary PDF link -->
<!-- <span class="link-block">
<a href="static/pdfs/supplementary_material.pdf" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Supplementary</span>
</a>
</span> -->
<!-- Github link -->
<span class="link-block">
<a href="https://github.com/xiaomiaozhang/Co-MTP" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Code</span>
</a>
</span>
<!-- ArXiv abstract Link -->
<span class="link-block">
<a href="https://arxiv.org/abs/2502.16589" target="_blank" rel="noopener noreferrer" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<span class="ai ai-arxiv"></span>
</span>
<span>arXiv</span>
</a>
</span>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- Teaser video-->
<!-- <section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<video poster="" id="tree" autoplay controls muted loop height="100%"> -->
<!-- Your video here -->
<!-- <source src="static/videos/banner_video.mp4"
type="video/mp4">
</video>
<h2 class="subtitle has-text-centered">
Aliquam vitae elit ullamcorper tellus egestas pellentesque. Ut lacus tellus, maximus vel lectus at, placerat pretium mi. Maecenas dignissim tincidunt vestibulum. Sed consequat hendrerit nisl ut maximus.
</h2>
</div>
</div>
</section> -->
<!-- End teaser video -->
<!-- Paper abstract -->
<section class="section hero is-light">
<div class="container custom-container">
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
<p>
Vehicle-to-everything technologies (V2X) have become an ideal paradigm to extend the perception range and see through the occlusion. Exiting efforts focus on single-frame cooperative perception, however, how to capture the temporal cue between frames with V2X to facilitate the prediction task even the planning task is still underexplored. In this paper, we introduce the Co-MTP, a general cooperative trajectory prediction framework with multi-temporal fusion for autonomous driving, which leverages the V2X system to fully capture the interaction among agents in both history and future domains to benefit the planning. In the history domain, V2X can complement the incomplete history trajectory in single-vehicle perception, and we design a heterogeneous graph transformer to learn the fusion of the history feature from multiple agents and capture the history interaction. Moreover, the goal of prediction is to support future planning. Thus, in the future domain, V2X can provide the prediction results of surrounding objects, and we further extend the graph transformer to capture the future interaction among the ego planning and the other vehicles' intentions and obtain the final future scenario state under a certain planning action. We evaluate the Co-MTP framework on the real-world dataset V2X-Seq, and the results show that Co-MTP achieves state-of-the-art performance and that both history and future fusion can greatly benefit prediction.
</p>
</div>
</div>
</div>
</div>
</section>
<!-- End paper abstract -->
<!-- Method -->
<!-- <section class="hero teaser">
<div class="container full-container">
<div class="column is-four-fifths">
<h2 class="title is-3">Co-MTP Framework</h2>
<div class="content has-text-justified">
<img src="static/images/overview.jpg" alt="MY ALT TEXT"/>
<p>
The overall architecture of Co-MTP. In this framework, infrastructures share the history and their prediction results to ego CAV. Then, we construct a heterogeneous scene graph with the processed trajectory data and map information, categorizing them according to the types of objects and map elements. Next, we initialize the features of nodes and edges in the relative coordinate system of each object. The CTCA Fusion is used to update the features of the nodes and edges selected by the STSA module over K Transformer layers. Finally, we take the nodes' hidden features from the last layer and input them into the Multimodal Decoder to obtain the multimodal trajectory prediction results.
<p>
</div>
</div>
</div>
</section> -->
<!-- End Method -->
<style>
/* 保证整个模块居中 */
.full-container {
display: flex;
justify-content: center; /* 水平居中 */
align-items: center; /* 垂直居中 */
}
/* 设置内容列的最大宽度和位置 */
.column.is-four-fifths {
width: 80%; /* 可以根据需要调整列的宽度 */
}
/* 设置标题居左 */
.title.is-4 {
text-align: left; /* 使标题左对齐 */
}
/* 图片居中 */
.content img {
display: block;
margin: 0 auto; /* 使图片居中 */
}
/* 文字居中并且文本两端对齐 */
.content.has-text-justified {
text-align: justify;
}
</style>
<!-- Method -->
<section class="hero teaser">
<div class="container full-container">
<div class="column is-four-fifths">
<h2 class="title is-4">Co-MTP Framework</h2>
<div class="content has-text-justified">
<img src="static/images/overview.jpg" alt="MY ALT TEXT"/>
<p>
<strong>The overall architecture of Co-MTP.</strong> In this framework, infrastructures share the history and their prediction results to ego CAV. Then, we construct a heterogeneous scene graph with the processed trajectory data and map information, categorizing them according to the types of objects and map elements. Next, we initialize the features of nodes and edges in the relative coordinate system of each object. The CTCA Fusion is used to update the features of the nodes and edges selected by the STSA module over K Transformer layers. Finally, we take the nodes' hidden features from the last layer and input them into the Multimodal Decoder to obtain the multimodal trajectory prediction results.
</p>
</div>
</div>
</div>
</section>
<!-- End Method -->
<!-- Experiment -->
<section class="hero teaser">
<div class="container full-container">
<div class="column is-four-fifths">
<h2 class="title is-4">Experiment</h2>
<div class="content has-text-justified">
<div class="has-text-centered">
<img src="static/images/main_result.png" alt="Description of the new image" class="new-image_1"/>
<p style="text-align: justify;">
<strong>Performance comparison on the V2X-Seq dataset.</strong> TNT, HiVT and V2X-Graph are existing methods on the V2X-Seq dataset. Co-HTTP is the baseline model, simplified from our Co-MTP model. The framework Co-MTP ranks first across minADE/minFDE/MR in the benchmark of the dataset.
</p>
</div>
<!-- 增加空行 -->
<br>
<div class="has-text-centered">
<img src="static/images/ablation_study.png" alt="Description of the new image" class="new-image_2"/>
<p style="text-align: justify;">
<strong>Results of model ablation study.</strong> We examine the effectiveness of multiview data processing strategies and the decoder, assessing Co-MTP variations separately in history and future time dimensions.
</p>
</div>
<!-- 增加空行 -->
<br>
<div class="has-text-centered">
<!-- 图片容器 -->
<div style="display: flex; justify-content: center; gap: 20px;">
<img src="static/images/noise.png" alt="Description of image 1" style="max-width: 47%;"/>
<img src="static/images/time_delay.png" alt="Description of image 2" style="max-width: 47%;"/>
</div>
<p style="text-align: justify; margin-top: 0px;">
<strong>Robustness assessment.</strong> We conduct robustness assessments by introducing noise and communication delays, assuming a positional deviation of 0.2 meters and a time delay of 0.5 seconds. We design experiments using the same Co-MTP model base, alongside two variants: Co-MTP-no fusion, which excludes the future fusion, and Co-HTTP-nofut, which simply stitches the trajectory without future information.
</p>
</div>
<div class="gif-container">
<!-- 每个 GIF 和标题的容器 -->
<div class="gif-item">
<img src="static/images/visualize_pra_1001.gif" alt="MY ALT TEXT" class="gif-image"/>
<p class="gif-caption">(a)</p>
</div>
<div class="gif-item">
<img src="static/images/visualize_pra_10025.gif" alt="MY ALT TEXT" class="gif-image"/>
<p class="gif-caption">(b)</p>
</div>
<div class="gif-item">
<img src="static/images/visualize_pra_10058.gif" alt="MY ALT TEXT" class="gif-image"/>
<p class="gif-caption">(c)</p>
</div>
<div class="gif-item">
<img src="static/images/visualize_pra_10078.gif" alt="MY ALT TEXT" class="gif-image"/>
<p class="gif-caption">(d)</p>
</div>
<p>
<strong>Qualitative examples of Co-MTP on V2X-Seq dataset.</strong> The red box are AV, while the orange ones are the predicted targets. The history ground-truth are shown in blue, the predicted trajectories are shown in green, , and the future ground-truth are shown in brown.
<p>
</div>
<p>
<!-- 其他内容 -->
</p>
</div>
</div>
</div>
</section>
<!-- End Experiment -->
<style>
.gif-container {
display: flex;
flex-wrap: wrap;
gap: 0px; /* 调整 GIF 之间的间距 */
}
.new-image_1 {
max-width: 53%; /* 图片最大宽度 */
margin: 20px 0; /* 上下边距 */
}
.new-image_2 {
max-width: 100%; /* 图片最大宽度 */
margin: 20px 0; /* 上下边距 */
}
.gif-item {
flex: 1 1 calc(50% - 0px); /* 每行两个 GIF,减去间距 */
max-width: calc(50% - 0px); /* 确保每行只有两个 GIF */
text-align: center; /* 标题居中 */
}
.gif-image {
width: 100%; /* 让 GIF 填满容器宽度 */
height: auto; /* 保持比例 */
max-height: 600px; /* 设置最大高度,根据需要调整 */
}
.gif-caption {
margin-top: 0px; /* 标题与 GIF 的间距 */
font-size: 14px; /* 标题字体大小 */
color: #333; /* 标题颜色 */
}
</style>
<!--BibTex citation -->
<section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<pre><code>@misc{zhang2025comtpcooperativetrajectoryprediction,
title={Co-MTP: A Cooperative Trajectory Prediction Framework with Multi-Temporal Fusion for Autonomous Driving},
author={Xinyu Zhang and Zewei Zhou and Zhaoyi Wang and Yangjie Ji and Yanjun Huang and Hong Chen},
year={2025},
eprint={2502.16589},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2502.16589},
}</code></pre>
</div>
</section>
<!--End BibTex citation -->
<!-- <footer class="footer">
<div class="container">
<div class="columns is-centered">
<div class="column is-8">
<div class="content">
<p>
This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
You are free to borrow the source code of this website, we just ask that you link back to this page in the footer. <br> This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
Commons Attribution-ShareAlike 4.0 International License</a>.
</p>
</div>
</div>
</div>
</div>
</footer> -->
<!-- Statcounter tracking code -->
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->
<!-- End of Statcounter Code -->
</body>
</html>