-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathworkshop2023.html
799 lines (694 loc) · 40.5 KB
/
workshop2023.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta name="keywords" content="hands,ICCV 2023,workshop,pose estimation">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="generator" content="jemdoc, see http://jemdoc.jaboc.net/" />
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<link rel="stylesheet" href="main.css" type="text/css" />
<link rel="stylesheet" href="font-awesome/css/font-awesome.min.css">
<title>HANDS Workshop</title>
</head>
<body>
<div id="main-container">
<div id="header-container">
<div id="header">
<div id="header-icon-text-container">
<div id="header-text-container">
<nav class="style1">
<ul id="outer_list">
<li id="outer_li_year"><a id="current_year" href="#">2023<span id="arrow"></span></a>
<ul id="top_list">
<li id="style2"><a id="style3" href="workshop2024.html">2024</a></li>
<li id="style2"><a id="style3" href="workshop2023.html">2023</a></li>
<li id="style2"><a id="style3" href="workshop2022.html">2022</a></li>
<li id="style2"><a id="style3" href="https://sites.google.com/view/hands2019/home">2019</a>
<li id="style2"><a id="style3" href="https://sites.google.com/view/hands2018">2018</a>
<li id="style2"><a id="style3" href="">2017</a>
<li id="style2"><a id="style3" href="https://labicvl.github.io/hand/Hands2016/#home">2016</a>
<li id="style2"><a id="style3" href="">2015</a>
<!-- <li id="style2"><a id="style3" href="workshop2022">2022</a></li> -->
</ul>
</li>
<li id="outer_li"><a id="workshop_link" href="#">Workshop</a>
</li>
<li id="outer_li"><a id="challenge_link" href="#">Challenge</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div id="layout-content">
<div id="text-img-container">
<div id="img-container">
<a href="https://hands-workshop.org/"><img width="100%" alt="HANDS" src="logos/hands.png"></a>
</div>
<div id="text-container"></div>
</div>
<p>
<div id="beamer">
<beam>
Observing and Understanding <b>Hands</b> in Action
</beam></br>
<beams>
in conjunction with ICCV 2023</br>
</beams>
</div>
<br>
<div id="menu-container">
<div id="menu-item"><a id="style6" href="#overview">Overview</a></div>
<div id="menu-item"><a id="style6" href="#schedule">Schedule</a></div>
<div id="menu-item"><a id="style6" href="#papers">Papers</a></div>
<div id="menu-item"><a id="style6" href="#speakers">Speakers</a></div>
<div id="menu-item"><a id="style6" href="#organizers">Organizers</a></div>
<div id="menu-item"><a id="style6" href="#sponsors">Sponsors</a></div>
<div id="menu-item"><a id="style6" href="#contact">Contact</a></div>
</div>
<br>
<br>
<p style="align-items: center;text-align: center; font-size:20px;">
<b>This page is a rebuild of the original page, which can be found <a target="_blank"
href="https://sites.google.com/view/hands2023/home">here</a></b><br>
</p>
<h1 id="overview">Overview </h1>
<font size="5">
Welcome to join our ICCV 2023 Workshop!
</font>
</br>
</br>
<p>The Workshop on Observing and Understanding Hands in Action (HANDS) will gather vision researchers working on
perceiving hands performing actions, including 2D & 3D hand detection, segmentation, pose/shape
estimation,
tracking, etc. The seventh edition of this workshop (HANDS@ICCV2023) will emphasize <b>hand pose estimation
from
the egocentric view</b> and <b>hands performing fine-grained actions and interactions with tools and
objects</b>. </p>
<p>Development of RGB-D sensors and camera miniaturization (wearable cameras, smart phones, ubiquitous
computing)
have opened the door to a whole new range of technologies and applications which require detecting hands and
recognizing hand poses in a variety of scenarios, including AR/VR, assistive systems, robot grasping, and
health
care. However, the tasks of hand pose estimation from an egocentric camera and/or in the presence of heavy
occlusion are still challenging under the status quo. </p>
<p>
Compared to static camera settings, recognizing hands in egocentric images is a more difficult problem due to
viewpoint bias, camera distortion (e.g., fisheye), and motion blur from the head movement. Additionally,
addressing the occlusion during hand-object or hand-hand interactions is an important open challenge that
still
attracts significant attention for real-world applications. We will also cover related applications, including
gesture recognition, hand-object manipulation analysis, hand activity understanding, and interactive
interfaces.
The relevant topics include:
</p>
<h2>Topics</h2>
We will cover all hand-related topics. The relevant topics include and not limited to:
<ul id="topicstyle1">
<li id="topicstyle2">2D/3D hand pose estimation</li>
<li id="topicstyle2">Hand shape estimation </li>
<li id="topicstyle2">Hand-object/hand interaction</li>
<li id="topicstyle2">Hand detection/segmentation</li>
<li id="topicstyle2">Gesture recognition/interfaces</li>
<li id="topicstyle2">3D hand tracking and motion capture</li>
<li id="topicstyle2">Hand modeling and rendering</li>
<li id="topicstyle2">Egocentric vision</li>
<li id="topicstyle2">Hand activity understanding</li>
<li id="topicstyle2">Robot grasping and object manipulation</li>
<li id="topicstyle2">Hand image capture and camera systems</li>
<li id="topicstyle2">Efficient hand annotation methods and devices </li>
<li id="topicstyle2">Algorithm, theory, and network architecture</li>
<li id="topicstyle2">Efficient learning methods with limited labels</li>
<li id="topicstyle2">Generalization and adaptation to unseen users and environments </li>
<li id="topicstyle2">Applications in AR/VR, Robotics, and Haptics</li>
</ul>
<h1 id="schedule">Schedule(Paris Time)</h1>
<p style="align-items: center;text-align: center;"><b>Monday afternoon (13:30-17:30), October. 2. 2023</b></br>
<b>W5, Paris Convention Center, France</b></br>
</p>
<table class="dataintable">
<tbody>
<tr>
<td><b>13:30 - 13:40</b></td>
<td>Opening Remarks</td>
</tr>
<tr>
<td><b>13:40 - 14:10</b></td>
<td> Invited Talk: He Wang</td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> Learning universal dexterous grasping policy from 3D point cloud observations</br>
<b>Abstract:</b> Dexterous hand grasping is an essential research problem for vision, graphics, and
robotics communities. In this talk, I would first cover our recent work, DexGraspNet, on synthesizing
million-scale diverse dexterous hand grasping data, which won ICRA 2023 outstanding manipulation paper
award finalist. Based on this data, our CVPR 2023 work, UniDexGrasp, learns a generalizable point
cloud based dexterous grasping policy that can generalize across thousands of objects. We further
extend this work to UniDexGrasp++, accepted as an ICCV oral, that proposes a general framework that
greatly enhances the success rate to more than 80%.
</td>
</tr>
<tr>
<td colspan="2">
<div class="youtube">
<center>
<iframe width="100%" height="auto" class="elementor-video-iframe"
style="display: block;aspect-ratio:16/9;" src="https://www.youtube.com/embed/uQi065n-nf8"
frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</center>
</iframe>
</div>
</td>
</tr>
<tr>
<td><b>14:10 - 14:40</b></td>
<td> Invited Talk: Gül Varol </td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> Automatic annotation of open-vocabulary sign language videos</br>
<b>Abstract:</b> Research on sign language technologies has suffered from the lack of data to train
machine learning models. This talk will describe our recent efforts on scalable approaches to
automatically annotate continuous sign language videos with the goal of building a large-scale
dataset. In particular, we leverage weakly-aligned subtitles from sign interpreted broadcast footage.
These subtitles provide us candidate keywords to search and localise individual signs. To this end, we
develop several sign spotting techniques: (i) using mouthing cues at the lip region, (ii) looking up
videos from sign language dictionaries, and (iii) exploring the sign localisation that emerges from
the attention mechanism of a sequence prediction model. We further tackle the subtitle alignment
problem to improve their synchronization with signing. With these methods, we build the BBC-Oxford
British Sign Language Dataset (BOBSL), continuous signing videos of more than a thousand hours,
containing millions of sign instance annotations from a large vocabulary. These annotations allow us
to train large-vocabulary continuous sign language recognition (transcription of each sign), as well
as subtitle-video retrieval, which we hope will open up new possibilities towards addressing the
currently unsolved problem of sign language translation in the wild.
</td>
</tr>
<tr>
<td colspan="2">
<div class="youtube">
<center>
<iframe width="100%" height="auto" class="elementor-video-iframe"
style="display: block;aspect-ratio:16/9;" src="https://www.youtube.com/embed/SNL7WdDQOAA"
frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</center>
</iframe>
</div>
</td>
</tr>
<tr>
<td> <b>14:40 - 15:10</b></td>
<td> Invited Talk: Gyeongsik Moon</td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> Towards 3D Interacting Hands Recovery in the Wild</br>
<b>Abstract:</b> Understanding interactions between two hands is critical for analyzing various
hand-driven social signals and the manipulation of objects using both hands. Recently introduced
large-scale InterHand2.6M dataset enabled learning-based approaches to recover 3D interacting hands
from a single image. Despite the significant improvements, most methods have focused on recovering 3D
interacting hands mainly from images of InterHand2.6M, which have very different image appearances
compared to those of in-the-wild images as it was captured in a constraint studio. For the 3D
interacting hands recovery in the wild, this talk will introduce two recent works: one for the
algorithmic approach and the other for the dataset approach where each is accepted by CVPR 2023 and
NeurIPS 2023. For the algorithmic approach, we introduce InterWild, a 3D interacting hands recovery
system that brings inputs from in-the-lab and in-the-wild datasets to a shared domain to reduce the
domain gap between them. For the dataset approach, we introduce our new dataset, Re:InterHand, which
consists of accurately tracked 3D geometry of interacting hands and rendered images with a pre-trained
state-of-the-art relighting network. As the images are rendered with lighting from high-resolution
environment maps, our Re:InterHand dataset provides images with highly diverse and realistic
appearances. As a result, 3D interacting hands recovery systems trained on Re:InterHand achieve better
generalizability to in-the-wild images than simply training it on in-the-lab datasets.
</td>
</tr>
<tr>
<td colspan="2">
<div class="youtube">
<center>
<iframe width="100%" height="auto" class="elementor-video-iframe"
style="display: block;aspect-ratio:16/9;" src="https://www.youtube.com/embed/5-0zENBKU5o"
frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</center>
</iframe>
</div>
</td>
</tr>
<tr>
<td><b>15:10 - 16:10</b></td>
<td><a href="#papers"> Poster List</a> Coffee break time & Poster </td>
</tr>
<tr>
<td> <b>16:10 - 16:40</b></td>
<td> Invited Talk: David Fouhey</td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> From Hands In Action to Possibilities of Interaction</br>
<b>Abstract:</b> In this talk, I'll show some recent work from our research group spanning the gamut
from understanding hands in action to imagining possibilities for interaction. In the first part, I'll
focus on a new system and dataset for obtaining a deeper basic understanding of hands and in-contact
objects, including tool use. The second part looks forward towards the future and will show a new
system that aims to provide information at potential interaction sites.
</td>
</tr>
<tr>
<td colspan="2">
<div class="youtube">
<center>
<iframe width="100%" height="auto" class="elementor-video-iframe"
style="display: block;aspect-ratio:16/9;" src="https://www.youtube.com/embed/pquEbI2ph5g"
frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</center>
</iframe>
</div>
</td>
</tr>
<tr>
<td><b>16:40 - 17:10</b></td>
<td> Invited Talk: Lixin Yang </td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> Paving the way for further understanding in human interactions with objects in task
completion: the OakInk and OakInk2 datasets</br>
<b>Abstract:</b> Researching how humans accomplish daily tasks through object manipulation presents a
long-standing challenge. Recognizing object affordances and learning human interactions with these
affordances offers a potential solution. In 2022, to facilitate data-driven learning methodologies, we
proposed OakInk, a substantial knowledge repository consisting of two wings: 'Oak' for object
affordances and 'Ink' for intention-oriented, affordance-aware interactions.This talk will introduce
our work in 2023: we expanded the OakInk methodology, giving rise to OakInk2 - a comprehensive dataset
encompassing embodied hand-object interactions during complex, long-horizon task completion. OakInk2
incorporates demonstrations of 'Primitive Tasks', defined as minimal interactions necessary for
fulfilling object affordance attributes, and 'Combined Tasks', which merge Primitive Tasks with
specific dependencies. Both OakInk and OakInk2 capture multi-view image streams, provide detailed pose
annotations for embodied hands and diverse interacting objects, and scrutinize dependencies between
Primitive Task completion and underlying object affordance fulfillment. With all these knowledge
incoporated, we show that OakInk and OakInk2 will provide strong support for a variety of tasks
including hand-object reconstruction, motion synthesis, and the planning, imitation, and manipulation
within the scope of embodied AI.
</td>
</tr>
<tr>
<td colspan="2">
<div class="youtube">
<center>
<iframe width="100%" height="auto" class="elementor-video-iframe"
style="display: block;aspect-ratio:16/9;" src="https://www.youtube.com/embed/R1wbaBYOBmk"
frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</center>
</iframe>
</div>
</td>
</tr>
<tr>
<td><b>17:10 - 17:17</b></td>
<td> Report: Aditya Prakash </td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> Reducing Scale Ambiguity due to Data Augmentation</br>
</td>
</tr>
<tr>
<td><b>17:17 - 17:24</b></td>
<td> Report: Karim Abou Zeid </td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> Joint Transformer</br>
</td>
</tr>
<tr>
<td><b>17:24 - 17:31</b></td>
<td> Report: Zhishan Zhou </td>
</tr>
<tr>
<td></td>
<td> <b>Title:</b> A Concise Pipeline for Egocentric Hand Pose Reconstruction</br>
</td>
</tr>
<tr>
<td> <b>17:31 - 17:31</b></td>
<td> Closing Remarks</td>
</tr>
</tbody>
</table>
<h1 id="papers">Accepted Papers & Extended Abstracts</h1>
<!-- list papers in the form of:
OakInk2 : A Dataset for Long-Horizon Hand-Object Interaction and Complex Manipulation Task Completion.
Xinyu Zhan*, Lixin Yang*, Kangrui Mao, Hanlin Xu, Yifei Zhao, Zenan Lin, Kailin Li, Cewu Lu.
[pdf] -->
<p>We are delighted to announce the following accepted papers and extended abstracts will appear in the
workshop! All Extended abstracts and invited posters should prepare posters for communication during the
workshop.</p> </br>
<p> <b>Poster size: the posters should be portrait (vertical), with a maximum size of 90x180 cm.</b></p>
<!-- <ul> -->
<h2>Accepted Extended Abstracts</h2>
<ul>
<li> OakInk2 : A Dataset for Long-Horizon Hand-Object Interaction and Complex Manipulation Task
Completion. <br>
<i>Xinyu Zhan*, Lixin Yang*, Kangrui Mao, Hanlin Xu, Yifei Zhao, Zenan Lin, Kailin Li, Cewu Lu. </i> <br>
<!-- [pdf] -->
</li>
<a href="https://drive.google.com/file/d/1fM7HdNOnto7Gb4oKEEnYsLU7bxvQxbag/view?usp=share_link">[pdf]</a>
</ul>
<ul>
<li> A Novel Framework for Generating In-the-Wild 3D Hand Datasets. <br>
<i>Junho Park*, Kyeongbo Kong*, Suk-ju Kang.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1r3pBVFGSEufzGPhz0l1TmKV_s08FJmP_/view?usp=share_link">[pdf]</a>
</li>
</ul>
<ul>
<li> New keypoint-based approach for recognising British Sign Language (BSL) from sequences. <br>
<i>Oishi Deb*, Prajwal KR, Andrew Zisserman.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1YX3tCMKvq5iflqYqmBpCdLxvYTR3ZuU_/view?usp=share_link">[pdf]</a>
</li>
</ul>
<ul>
<li> Text-to-Hand-Image Generation Using Pose- and Mesh-Guided Diffusion. <br>
<i>Supreeth Narasimhaswamy, Uttaran Bhattacharya, Xiang Chen, Ishita Dasgupta, and Saayan Mitra.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1T0ACyywj4PH9Te9pKVtiwC-ODSQJHqGM/view?usp=share_link">[pdf]</a>
</li>
</ul>
<ul>
<li> Hand Segmentation with Fine-tuned Deep CNN in Egocentric Videos. <br>
<i>Eyitomilayo Yemisi Babatope, Alejandro A. Ramírez-Acosta, Mireya S. García-Vázquez.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1nY0VUB6aFbQiXIhHHnWMazP6xB2-oAhL/view?usp=share_link">[pdf]</a>
</li>
</ul>
<h2>Technical Reports</h2>
<ul>
<li> A Concise Pipeline for Egocentric Hand Pose Reconstruction. <br>
<i>Zhishan Zhou*, Zhi Lv*, Shihao Zhou, Minqiang Zou, Tong Wu, Mochen Yu, Yao Tang, Jiajun Liang.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1C3oizCPxnQhZ_De_P9vZfszoIGUWelK3/view?usp=share_link">[pdf]</a>
</li>
</ul>
<ul>
<li> Multi-View Fusion Strategy for Egocentric 3D Hand Pose Estimation. <br>
<i>Zhong Gao, Xuanyang Zhang.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1m_oJY0yvJZSLebDBopSL0Hsg0PhN-J_A/view?usp=share_link">[pdf]</a>
</li>
</ul>
<ul>
<li> Egocentric 3D Hand Pose Estimation. <br>
<i>Xue Zhang, Jingyi Wang, Fei Li, Rujie Liu.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/1Y9YnbX2jPbXSpmRj5OIOy2txnBTqgAH1/view?usp=share_link">[pdf]</a>
</li>
</ul>
<ul>
<li> Reducing Scale Ambiguity due to Data Augmentation in 3D Hand-Object Pose Estimation. <br>
<i>Aditya Prakash, Saurabh Gupta.</i> <br>
<!-- [pdf] -->
<a href="https://drive.google.com/file/d/12rlQmN1wfALdMwaSLzvK4sATUk_HYyUd/view?usp=share_link">[pdf]</a>
</li>
</ul>
<h2>Invited Posters</h2>
<ul>
<li> Spectral Graph-Based Transformer for Egocentric Two-Hand Reconstruction using Multi-View Color
Images. <br>
<i>Tze Ho Elden Tse, Franziska Mueller, Zhengyang Shen, Danhang Tang, Thabo Beeler, Mingsong Dou, Yinda
Zhang, Sasa Petrovic, Hyung Jin Chang, Jonathan Taylor, Bardia Doosti.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Tse_Spectral_Graphormer_Spectral_Graph-Based_Transformer_for_Egocentric_Two-Hand_Reconstruction_using_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Tse_Spectral_Graphormer_Spectral_ICCV_2023_supplemental.pdf">[supp]</a>
</li>
</ul>
<ul>
<li> Deformer: Dynamic Fusion Transformer for Robust Hand Pose Estimation. <br>
<i>Qichen Fu, Xingyu Liu, Ran Xu, Juan Carlos Niebles, Kris M. Kitani.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Fu_Deformer_Dynamic_Fusion_Transformer_for_Robust_Hand_Pose_Estimation_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Fu_Deformer_Dynamic_Fusion_ICCV_2023_supplemental.pdf">[supp]</a>
</li>
</ul>
<ul>
<li> HandR2N2: Iterative 3D Hand Pose Estimation Using a Residual Recurrent Neural Network. <br>
<i>Wencan Cheng, Jong Hwan Ko.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Cheng_HandR2N2_Iterative_3D_Hand_Pose_Estimation_Using_a_Residual_Recurrent_ICCV_2023_paper.pdf">[pdf]</a>
</li>
</ul>
<ul>
<li> HoloAssist: an Egocentric Human Interaction Dataset for Interactive AI Assistants in the Real
World. <br>
<i>Xin Wang, Taein Kwon, Mahdi Rad, Bowen Pan, Ishani Chakraborty, Sean Andrist, Dan Bohus, Ashley Feniello,
Bugra Tekin, Felipe Vieira Frujeri, Neel Joshi, Marc Pollefeys.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Wang_HoloAssist_an_Egocentric_Human_Interaction_Dataset_for_Interactive_AI_Assistants_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Wang_HoloAssist_an_Egocentric_ICCV_2023_supplemental.pdf">[supp]</a>
</li>
</ul>
<ul>
<li> MHEntropy: Entropy Meets Multiple Hypotheses for Pose and Shape Recovery. <br>
<i>Rongyu Chen, Linlin Yang, Angela Yao.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Chen_MHEntropy_Entropy_Meets_Multiple_Hypotheses_for_Pose_and_Shape_Recovery_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Chen_MHEntropy_Entropy_Meets_ICCV_2023_supplemental.pdf">[supp]</a>
</li>
</ul>
<ul>
<li> UniDexGrasp++: Improving Dexterous Grasping Policy Learning via Geometry-Aware Curriculum and
Iterative Generalist-Specialist Learning. <br>
<i>Weikang Wan, Haoran Geng, Yun Liu, Zikang Shan, Yaodong Yang, Li Yi, He Wang.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Wan_UniDexGrasp_Improving_Dexterous_Grasping_Policy_Learning_via_Geometry-Aware_Curriculum_and_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Wan_UniDexGrasp_Improving_Dexterous_ICCV_2023_supplemental.zip">[supp]</a>
</li>
</ul>
<ul>
<li> Decoupled Iterative Refinement Framework for Interacting Hands Reconstruction from a Single
RGB Image. <br>
<i>Pengfei Ren, Chao Wen, Xiaozheng Zheng, Zhou Xue, Haifeng Sun, Qi Qi, Jingyu Wang, Jianxin Liao.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Ren_Decoupled_Iterative_Refinement_Framework_for_Interacting_Hands_Reconstruction_from_a_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Ren_Decoupled_Iterative_Refinement_ICCV_2023_supplemental.zip">[supp]</a>
</li>
</ul>
<ul>
<li> HaMuCo: Hand Pose Estimation via Multiview Collaborative Self-Supervised Learning. <br>
<i>Xiaozheng Zheng, Chao Wen, Zhou Xue, Pengfei Ren, Jingyu Wang.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Zheng_HaMuCo_Hand_Pose_Estimation_via_Multiview_Collaborative_Self-Supervised_Learning_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Zheng_HaMuCo_Hand_Pose_ICCV_2023_supplemental.zip">[supp]</a>
</li>
</ul>
<ul>
<li> Realistic Full-Body Tracking from Sparse Observations via Joint-Level Modeling. <br>
<i>Xiaozheng Zheng, Zhuo Su, Chao Wen, Zhou Xue, Xiaojie Jin.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Zheng_Realistic_Full-Body_Tracking_from_Sparse_Observations_via_Joint-Level_Modeling_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Zheng_Realistic_Full-Body_Tracking_ICCV_2023_supplemental.zip">[supp]</a>
</li>
</ul>
<ul>
<li> CHORD: Category-level Hand-held Object Reconstruction via Shape Deformation. <br>
<i>Kailin Li, Lixin Yang, Haoyu Zhen, Zenan Lin, Xinyu Zhan, Licheng Zhong, Jian Xu, Kejian Wu, Cewu Lu.</i>
<br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Li_CHORD_Category-level_Hand-held_Object_Reconstruction_via_Shape_Deformation_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Li_CHORD_Category-level_Hand-held_ICCV_2023_supplemental.pdf">[supp]</a>
</li>
</ul>
<ul>
<li> OCHID-Fi: Occlusion-Robust Hand Pose Estimation in 3D via RF-Vision.<br>
<i>Shujie Zhang, Tianyue Zheng, Zhe Chen, Jingzhi Hu, Abdelwahed Khamis, Jiajun Liu, Jun Luo.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Zhang_OCHID-Fi_Occlusion-Robust_Hand_Pose_Estimation_in_3D_via_RF-Vision_ICCV_2023_paper.pdf">[pdf]</a>
</li>
</ul>
<ul>
<li> Multimodal Distillation for Egocentric Action Recognition.<br>
<i>Gorjan Radevski, Dusan Grujicic, Matthew Blaschko, Marie-Francine Moens, Tinne Tuytelaars.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Radevski_Multimodal_Distillation_for_Egocentric_Action_Recognition_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Radevski_Multimodal_Distillation_for_ICCV_2023_supplemental.pdf">[supp]</a>
</li>
</ul>
<ul>
<li> FineDance: A Fine-grained Choreography Dataset for 3D Full Body Dance Generation.<br>
<i>Ronghui Li, Junfan Zhao, Yachao Zhang, Mingyang Su, Zeping Ren, Han Zhang, Yansong Tang, Xiu Li.</i> <br>
<!-- [pdf] -->
<a
href="https://openaccess.thecvf.com/content/ICCV2023/papers/Li_FineDance_A_Fine-grained_Choreography_Dataset_for_3D_Full_Body_Dance_ICCV_2023_paper.pdf">[pdf]</a>
<a
href="https://openaccess.thecvf.com/content/ICCV2023/supplemental/Li_FineDance_A_Fine-grained_ICCV_2023_supplemental.zip">[supp]</a>
</li>
</ul>
<h1 id="speakers">Invited Speakers</h1>
<div id="member-container" style="display:grid">
<div id="member" style="display:flex;width:100%;">
<img src="./profiles/2023/davidFouhey.jpg" style="align-self:center;width:150px;height: 150px;">
<div style="text-align:left;margin-left:10px;margin-top:20px">
<b><a
href="https://www.google.com/url?q=https%3A%2F%2Fcs.nyu.edu%2F~fouhey%2F&sa=D&sntz=1&usg=AOvVaw1ZEYHu-LzcyRYsiVr_yoCu">David
Fouhey</a></b>
is an Assistant Professor at NYU, jointly appointed between Computer Science in the Courant Institute of
Mathematical Sciences and Electrical and Computer Engineering in the Tandon School of Engineering. His
research interests include understanding 3D from pictorial cues, understanding the interactive world and
measurement systems for basic sciences, especially solar physics.
</div>
</div>
<div id="member" style="display:flex;width:100%;">
<img src="./profiles/2023/gyeongsikMoon.jpg" style="align-self:center;width:150px;height: 150px;">
<div style="text-align:left;margin-left:10px;margin-top:20px">
<b><a
href="https://www.google.com/url?q=https%3A%2F%2Fmks0601.github.io%2F&sa=D&sntz=1&usg=AOvVaw3jJUGQIrSyDU0d6-gFr7RQ">Gyeongsik
Moon</a></b>
is a Postdoctoral Research Scientist of Reality Labs Research at Meta. His research is focused on
designing interactive AI systems that act like humans, look like humans, and perceive humans’ status
(motion, feeling, intention, and others) through computer vision, computer graphics, and machine
learning.
</div>
</div>
<div id="member" style="display:flex;width:100%;">
<img src="./profiles/2023/G%C3%BClVarol.jpg" style="align-self:center;width:150px;height: 150px;">
<div style="text-align:left;margin-left:10px;margin-top:20px">
<b><a
href="https://www.google.com/url?q=https%3A%2F%2Fimagine.enpc.fr%2F~varolg%2F&sa=D&sntz=1&usg=AOvVaw1J_f2NUrzLXoeDChSnjSBt">Gül
Varol</a></b>
is a Permanent Researcher in the IMAGINE team at École des Ponts ParisTech. Previously, she was a
postdoctoral researcher in the Visual Geometry Group (VGG) at the University of Oxford. She obtained her
PhD from the WILLOW team of Inria Paris and École Normale Supérieure, receiving PhD awards from ELLIS
and AFRIF. Her research is focused on computer vision, specifically video representation learning, human
motion analysis, and sign language.
</div>
</div>
<div id="member" style="display:flex;width:100%;">
<img src="./profiles/2023/heWang.jpg" style="align-self:center;width:150px;height: 150px;">
<div style="text-align:left;margin-left:10px;margin-top:20px">
<b><a
href="https://www.google.com/url?q=https%3A%2F%2Fhughw19.github.io&sa=D&sntz=1&usg=AOvVaw29aiBNPiw_IvGn33FEYohs">He
Wang</a></b>
is an Assistant Professor in the Center on Frontiers of Computing Studies (CFCS) at Peking University,
where he leads Embodied Perception and InteraCtion (EPIC) Lab. His research interests span 3D vision,
robotics, and machine learning. His research objective is to endow embodied agents working in complex
real-world scenes with generalizable 3D vision and interaction policies.
</div>
</div>
<div id="member" style="display:flex;width:100%;">
<img src="./profiles/2023/liXinYang.jpg" style="align-self:center;width:150px;height: 150px;">
<div style="text-align:left;margin-left:10px;margin-top:20px">
<b><a
href="https://www.google.com/url?q=https%3A%2F%2Flixiny.github.io&sa=D&sntz=1&usg=AOvVaw0lji8ZY4tg5oKMi7DIvDnZ">Lixin
Yang</a></b>
is an Assistant Professor in the department of Computer Science, Shanghai Jiao Tong University (SJTU).
He received his PhD degree from SJTU under the supervision of Prof. Cewu Lu. His research interests
include Computer Vision, Robotic Vision, 3D Vision and Graphics. Currently, he is focusing on modeling
and imitating the interaction of hand manipulating objects, including 3D hand pose and shape from X,
hand-object reconstruction, animation and synthesis.
</div>
</div>
</div>
<h1 id="organizers">Organizers</h1>
<li id="topicstyle2">Prof. Hyung Jin Chang (University of Birmingham)</li>
<li id="topicstyle2">Zicong Fan (ETHZ)</li>
<li id="topicstyle2">Prof. Otmar Hilliges (ETHZ)</li>
<li id="topicstyle2">Takehiko Ohkawa (University of Tokyo)</li>
<li id="topicstyle2">Prof. Yoichi Sato (University of Tokyo)</li>
<li id="topicstyle2">Dr. Linlin Yang (Communication University of China)</li>
<li id="topicstyle2">Prof. Angela Yao (NUS)</li>
<h1 id="sponsors">Sponsors</h1>
<div class="sponsors-container">
<img class="sponsor-img" src="./profiles/2023/sponsor1.png">
<img class="sponsor-img" src="./profiles/2023/sponsor2.png">
</div>
<h1 id="contact">Contact</h1>
<p>[email protected]</p>
<div id="footer">
<p style="align-items: center;text-align: center;">
<a href="https://youtube.com/@handsworkshop" target="_Blank">
<img id="page1" alt="" src="./profiles/youtube.jpg">
</a>
<a href="https://github.com/handsworkshop" target="_Blank">
<img id="page" alt="" src="./profiles/github.png">
</a>
</p>
</div>
<script>
var isYearUpdated = false; // 标志,默认未更新年份
document.getElementById('outer_li_year').addEventListener('click', function (event) {
event.preventDefault(); // 阻止默认链接行为
// 获取第一个<li>标签中的年份
var year = document.querySelector('#outer_list > li:first-child > a').textContent.trim();
if (year > '2020') {
// 构建新的href
var newHref = 'workshop' + year + '.html';
// 跳转到新的页面
window.location.href = newHref;
}
});
document.getElementById('workshop_link').addEventListener('click', function (event) {
event.preventDefault(); // 阻止默认链接行为
if (!isYearUpdated) {
var year = document.querySelector('#outer_list > li:first-child > a').textContent.trim();
var newHref = 'workshop' + year + '.html';
window.location.href = newHref;
}
});
document.getElementById('challenge_link').addEventListener('click', function (event) {
event.preventDefault(); // 阻止默认链接行为
if (!isYearUpdated) {
var year = document.querySelector('#outer_list > li:first-child > a').textContent.trim();
var newHref = 'challenge' + year + '.html';
window.location.href = newHref;
}
});
// 获取所有带有id="style3"的a标签
var yearLinks = document.querySelectorAll('#style3');
yearLinks.forEach(function (link) {
link.addEventListener('click', function (event) {
// 获取点击的年份
var selectedYear = this.textContent.trim();
if (selectedYear < '2020') {
isYearUpdated = true;
document.getElementById('current_year').textContent = selectedYear;
// 设置标志为已更新年份
window.location.href = link.href; // 确保使用 href 进行跳转
} else {
event.preventDefault(); // 阻止默认链接行为
document.getElementById('current_year').textContent = selectedYear;
// 设置标志为已更新年份
isYearUpdated = true;
// 关闭下拉菜单(如果需要)
// document.getElementById('top_list').style.display = 'none';
// 可选:添加其他逻辑
}
});
});
var workshopLi = document.querySelector('#workshop_link');
workshopLi.classList.add('highlight');
</script>
</body>
</html>