-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaper.html
3771 lines (3589 loc) · 334 KB
/
paper.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<title>Retrieval-Augmented Generation for Large Language Models: A Survey</title>
<!--Generated on Wed Mar 27 09:16:19 2024 by LaTeXML (version 0.8.7) http://dlmf.nist.gov/LaTeXML/.-->
<meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"/>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" type="text/css"/>
<link href="/static/browse/0.3.4/css/ar5iv_0.7.4.min.css" rel="stylesheet" type="text/css"/>
<link href="/static/browse/0.3.4/css/latexml_styles.css" rel="stylesheet" type="text/css"/>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.3.3/html2canvas.min.js"></script>
<script src="/static/browse/0.3.4/js/addons.js"></script>
<script src="/static/browse/0.3.4/js/feedbackOverlay.js"></script>
<meta content="
Large language model, retrieval-augmented generation, natural language processing, information retrieval
" lang="en" name="keywords"/>
<base href="/html/2312.10997v5/"/></head>
<body>
<nav class="ltx_page_navbar">
<nav class="ltx_TOC">
<ol class="ltx_toclist">
<li class="ltx_tocentry ltx_tocentry_section"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S1" title="I Introduction ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">I </span><span class="ltx_text ltx_font_smallcaps">Introduction</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2" title="II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">II </span><span class="ltx_text ltx_font_smallcaps">Overview of RAG </span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS1" title="II-A Naive RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-A</span> </span><span class="ltx_text ltx_font_italic">Naive RAG</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS2" title="II-B Advanced RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-B</span> </span><span class="ltx_text ltx_font_italic">Advanced RAG</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS3" title="II-C Modular RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-C</span> </span><span class="ltx_text ltx_font_italic">Modular RAG</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS3.SSS1" title="II-C1 New Modules ‣ II-C Modular RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-C</span>1 </span>New Modules</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS3.SSS2" title="II-C2 New Patterns ‣ II-C Modular RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-C</span>2 </span>New Patterns</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS4" title="II-D RAG vs Fine-tuning ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-D</span> </span><span class="ltx_text ltx_font_italic">RAG vs Fine-tuning</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3" title="III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">III </span><span class="ltx_text ltx_font_smallcaps">Retrieval</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS1" title="III-A Retrieval Source ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-A</span> </span><span class="ltx_text ltx_font_italic">Retrieval Source</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS1.SSS1" title="III-A1 Data Structure ‣ III-A Retrieval Source ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-A</span>1 </span>Data Structure</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS1.SSS2" title="III-A2 Retrieval Granularity ‣ III-A Retrieval Source ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-A</span>2 </span>Retrieval Granularity</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2" title="III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span> </span><span class="ltx_text ltx_font_italic">Indexing Optimization</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2.SSS1" title="III-B1 Chunking Strategy ‣ III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span>1 </span>Chunking Strategy</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2.SSS2" title="III-B2 Metadata Attachments ‣ III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span>2 </span>Metadata Attachments</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2.SSS3" title="III-B3 Structural Index ‣ III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span>3 </span>Structural Index</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3" title="III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span> </span><span class="ltx_text ltx_font_italic">Query Optimization</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3.SSS1" title="III-C1 Query Expansion ‣ III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span>1 </span>Query Expansion</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3.SSS2" title="III-C2 Query Transformation ‣ III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span>2 </span>Query Transformation</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3.SSS3" title="III-C3 Query Routing ‣ III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span>3 </span>Query Routing</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS4" title="III-D Embedding ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-D</span> </span><span class="ltx_text ltx_font_italic">Embedding</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS4.SSS1" title="III-D1 Mix/hybrid Retrieval ‣ III-D Embedding ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-D</span>1 </span>Mix/hybrid Retrieval</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS4.SSS2" title="III-D2 Fine-tuning Embedding Model ‣ III-D Embedding ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-D</span>2 </span>Fine-tuning Embedding Model</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS5" title="III-E Adapter ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-E</span> </span><span class="ltx_text ltx_font_italic">Adapter</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4" title="IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">IV </span><span class="ltx_text ltx_font_smallcaps">Generation</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS1" title="IV-A Context Curation ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-A</span> </span><span class="ltx_text ltx_font_italic">Context Curation</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS1.SSS1" title="IV-A1 Reranking ‣ IV-A Context Curation ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-A</span>1 </span>Reranking</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS1.SSS2" title="IV-A2 Context Selection/Compression ‣ IV-A Context Curation ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-A</span>2 </span>Context Selection/Compression</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS2" title="IV-B LLM Fine-tuning ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-B</span> </span><span class="ltx_text ltx_font_italic">LLM Fine-tuning</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5" title="V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">V </span><span class="ltx_text ltx_font_smallcaps">Augmentation process in RAG</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.SS1" title="V-A Iterative Retrieval ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">V-A</span> </span><span class="ltx_text ltx_font_italic">Iterative Retrieval</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.SS2" title="V-B Recursive Retrieval ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">V-B</span> </span><span class="ltx_text ltx_font_italic">Recursive Retrieval</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.SS3" title="V-C Adaptive Retrieval ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">V-C</span> </span><span class="ltx_text ltx_font_italic">Adaptive Retrieval</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6" title="VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">VI </span><span class="ltx_text ltx_font_smallcaps">Task and Evaluation</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS1" title="VI-A Downstream Task ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-A</span> </span><span class="ltx_text ltx_font_italic">Downstream Task</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS2" title="VI-B Evaluation Target ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-B</span> </span><span class="ltx_text ltx_font_italic">Evaluation Target</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS3" title="VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-C</span> </span><span class="ltx_text ltx_font_italic">Evaluation Aspects</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS3.SSS1" title="VI-C1 Quality Scores ‣ VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-C</span>1 </span>Quality Scores</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS3.SSS2" title="VI-C2 Required Abilities ‣ VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-C</span>2 </span>Required Abilities</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS4" title="VI-D Evaluation Benchmarks and Tools ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-D</span> </span><span class="ltx_text ltx_font_italic">Evaluation Benchmarks and Tools</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7" title="VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">VII </span><span class="ltx_text ltx_font_smallcaps">Discussion and Future Prospects</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS1" title="VII-A RAG vs Long Context ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-A</span> </span><span class="ltx_text ltx_font_italic">RAG vs Long Context</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS2" title="VII-B RAG Robustness ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-B</span> </span><span class="ltx_text ltx_font_italic">RAG Robustness</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS3" title="VII-C Hybrid Approaches ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-C</span> </span><span class="ltx_text ltx_font_italic">Hybrid Approaches </span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS4" title="VII-D Scaling laws of RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-D</span> </span><span class="ltx_text ltx_font_italic">Scaling laws of RAG </span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS5" title="VII-E Production-Ready RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-E</span> </span><span class="ltx_text ltx_font_italic">Production-Ready RAG</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS6" title="VII-F Multi-modal RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-F</span> </span><span class="ltx_text ltx_font_italic">Multi-modal RAG</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S8" title="VIII Conclusion ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">VIII </span><span class="ltx_text ltx_font_smallcaps">Conclusion</span></span></a></li>
</ol></nav>
</nav>
<div class="ltx_page_main">
<div class="ltx_page_content"><div class="section" id="target-section"><div id="license-tr">License: arXiv.org perpetual non-exclusive license</div><div id="watermark-tr">arXiv:2312.10997v5 [cs.CL] 27 Mar 2024</div></div>
<article class="ltx_document ltx_authors_1line">
<h1 class="ltx_title ltx_title_document">Retrieval-Augmented Generation for Large Language Models: A Survey</h1>
<div class="ltx_authors">
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yunfan Gao
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yun Xiong
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Xinyu Gao
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Kangxiang Jia
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Jinliu Pan
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yuxi Bi
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">College of Design and Innovation, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yi Dai
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Jiawei Sun
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Meng Wang
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">College of Design and Innovation, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Haofen Wang
</span><span class="ltx_author_notes">Corresponding Author.Email:<a class="ltx_ref ltx_url ltx_font_typewriter" href="[email protected]" title="">[email protected]</a>
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span>
<span class="ltx_contact ltx_role_affiliation">College of Design and Innovation, Tongji University
</span></span></span>
</div>
<div class="ltx_abstract">
<h6 class="ltx_title ltx_title_abstract">Abstract</h6>
<p class="ltx_p" id="id1.id1">Large Language Models (LLMs) showcase impressive capabilities but encounter challenges like hallucination, outdated knowledge, and non-transparent, untraceable reasoning processes. Retrieval-Augmented Generation (RAG) has emerged as a promising solution by incorporating knowledge from external databases. This enhances the accuracy and credibility of the generation, particularly for knowledge-intensive tasks, and allows for continuous knowledge updates and integration of domain-specific information. RAG synergistically merges LLMs’ intrinsic knowledge with the vast, dynamic repositories of external databases. This comprehensive review paper offers a detailed examination of the progression of RAG paradigms, encompassing the Naive RAG, the Advanced RAG, and the Modular RAG. It meticulously scrutinizes the tripartite foundation of RAG frameworks, which includes the retrieval, the generation and the augmentation techniques. The paper highlights the state-of-the-art technologies embedded in each of these critical components, providing a profound understanding of the advancements in RAG systems. Furthermore, this paper introduces up-to-date evaluation framework and benchmark. At the end, this article delineates the challenges currently faced and points out prospective avenues for research and development <span class="ltx_note ltx_role_footnote" id="footnote1"><sup class="ltx_note_mark">1</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">1</sup><span class="ltx_tag ltx_tag_note">1</span>Resources are available at <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/Tongji-KGLLM/RAG-Survey" title="">https://github.com/Tongji-KGLLM/RAG-Survey</a> </span></span></span>.</p>
</div>
<div class="ltx_keywords">
<h6 class="ltx_title ltx_title_keywords">Index Terms: </h6>
Large language model, retrieval-augmented generation, natural language processing, information retrieval
</div>
<section class="ltx_section" id="S1">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">I </span><span class="ltx_text ltx_font_smallcaps" id="S1.1.1">Introduction</span>
</h2>
<div class="ltx_para" id="S1.p1">
<p class="ltx_p" id="S1.p1.1">Large language models (LLMs) have achieved remarkable success, though they still face significant limitations, especially in domain-specific or knowledge-intensive tasks <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib1" title="">1</a>]</cite>, notably producing “hallucinations” <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib2" title="">2</a>]</cite> when handling queries beyond their training data or requiring current information. To overcome challenges, Retrieval-Augmented Generation (RAG) enhances LLMs by retrieving relevant document chunks from external knowledge base through semantic similarity calculation. By referencing external knowledge, RAG effectively reduces the problem of generating factually incorrect content. Its integration into LLMs has resulted in widespread adoption, establishing RAG as a key technology in advancing chatbots and enhancing the suitability of LLMs for real-world applications.</p>
</div>
<figure class="ltx_figure" id="S1.F1"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="366" id="S1.F1.g1" src="extracted/5498883/images/rag_tech_tree.png" width="509"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 1: </span>Technology tree of RAG research. The stages of involving RAG mainly include pre-training, fine-tuning, and inference. With the emergence of LLMs, research on RAG initially focused on leveraging the powerful in context learning abilities of LLMs, primarily concentrating on the inference stage. Subsequent research has delved deeper, gradually integrating more with the fine-tuning of LLMs. Researchers have also been exploring ways to enhance language models in the pre-training stage through retrieval-augmented techniques.</figcaption>
</figure>
<div class="ltx_para" id="S1.p2">
<p class="ltx_p" id="S1.p2.1">RAG technology has rapidly developed in recent years, and the technology tree summarizing related research is shown in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S1.F1" title="Figure 1 ‣ I Introduction ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">1</span></a>. The development trajectory of RAG in the era of large models exhibits several distinct stage characteristics. Initially, RAG’s inception coincided with the rise of the Transformer architecture, focusing on enhancing language models by incorporating additional knowledge through Pre-Training Models (PTM). This early stage was characterized by foundational work aimed at refining pre-training techniques<cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib3" title="">3</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib5" title="">5</a>]</cite>.The subsequent arrival of ChatGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib6" title="">6</a>]</cite> marked a pivotal moment, with LLM demonstrating powerful in context learning (ICL) capabilities. RAG research shifted towards providing better information for LLMs to answer more complex and knowledge-intensive tasks during the inference stage, leading to rapid development in RAG studies. As research progressed, the enhancement of RAG was no longer limited to the inference stage but began to incorporate more with LLM fine-tuning techniques.
</p>
</div>
<div class="ltx_para" id="S1.p3">
<p class="ltx_p" id="S1.p3.1">The burgeoning field of RAG has experienced swift growth, yet it has not been accompanied by a systematic synthesis that could clarify its broader trajectory. This survey endeavors to fill this gap by mapping out the RAG process and charting its evolution and anticipated future paths, with a focus on the integration of RAG within LLMs. This paper considers both technical paradigms and research methods, summarizing three main research paradigms from over 100 RAG studies, and analyzing key technologies in the core stages of “Retrieval,” “Generation,” and “Augmentation.” On the other hand, current research tends to focus more on methods, lacking analysis and summarization of how to evaluate RAG. This paper comprehensively reviews the downstream tasks, datasets, benchmarks, and evaluation methods applicable to RAG. Overall, this paper sets out to meticulously compile and categorize the foundational technical concepts, historical progression, and the spectrum of RAG methodologies and applications that have emerged post-LLMs. It is designed to equip readers and professionals with a detailed and structured understanding of both large models and RAG. It aims to illuminate the evolution of retrieval augmentation techniques, assess the strengths and weaknesses of various approaches in their respective contexts, and speculate on upcoming trends and innovations.</p>
</div>
<div class="ltx_para" id="S1.p4">
<p class="ltx_p" id="S1.p4.1">Our contributions are as follows:
</p>
<ul class="ltx_itemize" id="S1.I1">
<li class="ltx_item" id="S1.I1.i1" style="list-style-type:none;">
<span class="ltx_tag ltx_tag_item">•</span>
<div class="ltx_para" id="S1.I1.i1.p1">
<p class="ltx_p" id="S1.I1.i1.p1.1">In this survey, we present a thorough and systematic review of the state-of-the-art RAG methods, delineating its evolution through paradigms including naive RAG, advanced RAG, and modular RAG. This review contextualizes the broader scope of RAG research within the landscape of LLMs.</p>
</div>
</li>
<li class="ltx_item" id="S1.I1.i2" style="list-style-type:none;">
<span class="ltx_tag ltx_tag_item">•</span>
<div class="ltx_para" id="S1.I1.i2.p1">
<p class="ltx_p" id="S1.I1.i2.p1.1">We identify and discuss the central technologies integral to the RAG process, specifically focusing on the aspects of “Retrieval”, “Generation” and “Augmentation”, and delve into their synergies, elucidating how these components intricately collaborate to form a cohesive and effective RAG framework.</p>
</div>
</li>
<li class="ltx_item" id="S1.I1.i3" style="list-style-type:none;">
<span class="ltx_tag ltx_tag_item">•</span>
<div class="ltx_para" id="S1.I1.i3.p1">
<p class="ltx_p" id="S1.I1.i3.p1.1">We have summarized the current assessment methods of RAG, covering 26 tasks, nearly 50 datasets, outlining the evaluation objectives and metrics, as well as the current evaluation benchmarks and tools. Additionally, we anticipate future directions for RAG, emphasizing potential enhancements to tackle current challenges.</p>
</div>
</li>
</ul>
</div>
<div class="ltx_para" id="S1.p5">
<p class="ltx_p" id="S1.p5.1">The paper unfolds as follows: Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2" title="II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">II</span></a> introduces the main concept and current paradigms of RAG. The following three sections explore core components—“Retrieval”, “Generation” and “Augmentation”, respectively.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3" title="III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">III</span></a> focuses on optimization methods in retrieval,including indexing, query and embedding optimization.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4" title="IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">IV</span></a> concentrates on post-retrieval process and LLM fine-tuning in generation.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5" title="V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">V</span></a> analyzes the three augmentation processes.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6" title="VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">VI</span></a> focuses on RAG’s downstream tasks and evaluation system. Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7" title="VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">VII</span></a> mainly discusses the challenges that RAG currently faces and its future development directions. At last, the paper concludes in Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S8" title="VIII Conclusion ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">VIII</span></a>.</p>
</div>
</section>
<section class="ltx_section" id="S2">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">II </span><span class="ltx_text ltx_font_smallcaps" id="S2.1.1">Overview of RAG </span>
</h2>
<div class="ltx_para" id="S2.p1">
<p class="ltx_p" id="S2.p1.1">A typical application of RAG is illustrated in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.F2" title="Figure 2 ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">2</span></a>. Here, a user poses a question to ChatGPT about a recent, widely discussed news. Given ChatGPT’s reliance on pre-training data, it initially lacks the capacity to provide updates on recent developments. RAG bridges this information gap by sourcing and incorporating knowledge from external databases. In this case, it gathers relevant news articles related to the user’s query. These articles, combined with the original question, form a comprehensive prompt that empowers LLMs to generate a well-informed answer.</p>
</div>
<figure class="ltx_figure" id="S2.F2"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="301" id="S2.F2.g1" src="extracted/5498883/images/RAG_case.png" width="509"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 2: </span>A representative instance of the RAG process applied to question answering. It mainly consists of 3 steps. 1) Indexing. Documents are split into chunks, encoded into vectors, and stored in a vector database. 2) Retrieval. Retrieve the Top k chunks most relevant to the question based on semantic similarity. 3) Generation. Input the original question and the retrieved chunks together into LLM to generate the final answer.</figcaption>
</figure>
<div class="ltx_para" id="S2.p2">
<p class="ltx_p" id="S2.p2.1">The RAG research paradigm is continuously evolving, and we categorize it into three stages: Naive RAG, Advanced RAG, and Modular RAG, as showed in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.F3" title="Figure 3 ‣ II-B Advanced RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">3</span></a>. Despite RAG method are cost-effective and surpass the performance of the native LLM, they also exhibit several limitations. The development of Advanced RAG and Modular RAG is a response to these specific shortcomings in Naive RAG.</p>
</div>
<section class="ltx_subsection" id="S2.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS1.5.1.1">II-A</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS1.6.2">Naive RAG</span>
</h3>
<div class="ltx_para" id="S2.SS1.p1">
<p class="ltx_p" id="S2.SS1.p1.1">The Naive RAG research paradigm represents the earliest methodology, which gained prominence shortly after the widespread adoption of ChatGPT. The Naive RAG follows a traditional process that includes indexing, retrieval, and generation, which is also characterized as a “Retrieve-Read” framework <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S2.SS1.p2">
<p class="ltx_p" id="S2.SS1.p2.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p2.1.1">Indexing</em> starts with the cleaning and extraction of raw data in diverse formats like PDF, HTML, Word, and Markdown, which is then converted into a uniform plain text format. To accommodate the context limitations of language models, text is segmented into smaller, digestible chunks. Chunks are then encoded into vector representations using an embedding model and stored in vector database. This step is crucial for enabling efficient similarity searches in the subsequent retrieval phase.</p>
</div>
<div class="ltx_para" id="S2.SS1.p3">
<p class="ltx_p" id="S2.SS1.p3.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p3.1.1">Retrieval</em>. Upon receipt of a user query, the RAG system employs the same encoding model utilized during the indexing phase to transform the query into a vector representation. It then computes the similarity scores between the query vector and the vector of chunks within the indexed corpus. The system prioritizes and retrieves the top K chunks that demonstrate the greatest similarity to the query. These chunks are subsequently used as the expanded context in prompt.</p>
</div>
<div class="ltx_para" id="S2.SS1.p4">
<p class="ltx_p" id="S2.SS1.p4.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p4.1.1">Generation</em>. The posed query and selected documents are synthesized into a coherent prompt to which a large language model is tasked with formulating a response. The model’s approach to answering may vary depending on task-specific criteria, allowing it to either draw upon its inherent parametric knowledge or restrict its responses to the information contained within the provided documents. In cases of ongoing dialogues, any existing conversational history can be integrated into the prompt, enabling the model to engage in multi-turn dialogue interactions effectively.</p>
</div>
<div class="ltx_para" id="S2.SS1.p5">
<p class="ltx_p" id="S2.SS1.p5.1">However, Naive RAG encounters notable drawbacks:</p>
</div>
<div class="ltx_para" id="S2.SS1.p6">
<p class="ltx_p" id="S2.SS1.p6.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p6.1.1">Retrieval Challenges</em>. The retrieval phase often struggles with precision and recall, leading to the selection of misaligned or irrelevant chunks, and the missing of crucial information.</p>
</div>
<div class="ltx_para" id="S2.SS1.p7">
<p class="ltx_p" id="S2.SS1.p7.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p7.1.1">Generation Difficulties</em>. In generating responses, the model may face the issue of hallucination, where it produces content not supported by the retrieved context. This phase can also suffer from irrelevance, toxicity, or bias in the outputs, detracting from the quality and reliability of the responses.</p>
</div>
<div class="ltx_para" id="S2.SS1.p8">
<p class="ltx_p" id="S2.SS1.p8.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p8.1.1">Augmentation Hurdles</em>. Integrating retrieved information with the different task can be challenging, sometimes resulting in disjointed or incoherent outputs. The process may also encounter redundancy when similar information is retrieved from multiple sources, leading to repetitive responses. Determining the significance and relevance of various passages and ensuring stylistic and tonal consistency add further complexity. Facing complex issues, a single retrieval based on the original query may not suffice to acquire adequate context information.</p>
</div>
<div class="ltx_para" id="S2.SS1.p9">
<p class="ltx_p" id="S2.SS1.p9.1">Moreover, there’s a concern that generation models might overly rely on augmented information, leading to outputs that simply echo retrieved content without adding insightful or synthesized information.</p>
</div>
</section>
<section class="ltx_subsection" id="S2.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS2.5.1.1">II-B</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS2.6.2">Advanced RAG</span>
</h3>
<div class="ltx_para" id="S2.SS2.p1">
<p class="ltx_p" id="S2.SS2.p1.1">Advanced RAG introduces specific improvements to overcome the limitations of Naive RAG. Focusing on enhancing retrieval quality, it employs pre-retrieval and post-retrieval strategies. To tackle the indexing issues, Advanced RAG refines its indexing techniques through the use of a sliding window approach, fine-grained segmentation, and the incorporation of metadata. Additionally, it incorporates several optimization methods to streamline the retrieval process<cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib8" title="">8</a>]</cite>.
</p>
</div>
<div class="ltx_para" id="S2.SS2.p2">
<p class="ltx_p" id="S2.SS2.p2.1"><em class="ltx_emph ltx_font_italic" id="S2.SS2.p2.1.1">Pre-retrieval process</em>. In this stage, the primary focus is on optimizing the indexing structure and the original query. The goal of optimizing indexing is to enhance the quality of the content being indexed. This involves strategies: enhancing data granularity, optimizing index structures, adding metadata, alignment optimization, and mixed retrieval. While the goal of query optimization is to make the user’s original question clearer and more suitable for the retrieval task. Common methods include query rewriting query transformation, query expansion and other techniques <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib9" title="">9</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib10" title="">10</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib11" title="">11</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S2.SS2.p3">
<p class="ltx_p" id="S2.SS2.p3.1"><em class="ltx_emph ltx_font_italic" id="S2.SS2.p3.1.1">Post-Retrieval Process</em>. Once relevant context is retrieved, it’s crucial to integrate it effectively with the query. The main methods in post-retrieval process include rerank chunks and context compressing. Re-ranking the retrieved information to relocate the most relevant content to the edges of the prompt is a key strategy. This concept has been implemented in frameworks such as LlamaIndex<span class="ltx_note ltx_role_footnote" id="footnote2"><sup class="ltx_note_mark">2</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">2</sup><span class="ltx_tag ltx_tag_note">2</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.llamaindex.ai" title="">https://www.llamaindex.ai</a></span></span></span>, LangChain<span class="ltx_note ltx_role_footnote" id="footnote3"><sup class="ltx_note_mark">3</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">3</sup><span class="ltx_tag ltx_tag_note">3</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.langchain.com/" title="">https://www.langchain.com/</a></span></span></span>, and HayStack <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib12" title="">12</a>]</cite>. Feeding all relevant documents directly into LLMs can lead to information overload, diluting the focus on key details with irrelevant content.To mitigate this, post-retrieval efforts concentrate on selecting the essential information, emphasizing critical sections, and shortening the context to be processed.
</p>
</div>
<figure class="ltx_figure" id="S2.F3"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="292" id="S2.F3.g1" src="extracted/5498883/images/RAG_FrameCompre_eng.png" width="480"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 3: </span>Comparison between the three paradigms of RAG. (Left) Naive RAG mainly consists of three parts: indexing, retrieval and generation. (Middle) Advanced RAG proposes multiple optimization strategies around pre-retrieval and post-retrieval, with a process similar to the Naive RAG, still following a chain-like structure. (Right) Modular RAG inherits and develops from the previous paradigm, showcasing greater flexibility overall. This is evident in the introduction of multiple specific functional modules and the replacement of existing modules. The overall process is not limited to sequential retrieval and generation; it includes methods such as iterative and adaptive retrieval.</figcaption>
</figure>
</section>
<section class="ltx_subsection" id="S2.SS3">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS3.5.1.1">II-C</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS3.6.2">Modular RAG</span>
</h3>
<div class="ltx_para" id="S2.SS3.p1">
<p class="ltx_p" id="S2.SS3.p1.1">The modular RAG architecture advances beyond the former two RAG paradigms, offering enhanced adaptability and versatility. It incorporates diverse strategies for improving its components, such as adding a search module for similarity searches and refining the retriever through fine-tuning. Innovations like restructured RAG modules <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite> and rearranged RAG pipelines <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite> have been introduced to tackle specific challenges. The shift towards a modular RAG approach is becoming prevalent, supporting both sequential processing and integrated end-to-end training across its components. Despite its distinctiveness, Modular RAG builds upon the foundational principles of Advanced and Naive RAG, illustrating a progression and refinement within the RAG family.</p>
</div>
<section class="ltx_subsubsection" id="S2.SS3.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S2.SS3.SSS1.5.1.1">II-C</span>1 </span>New Modules</h4>
<div class="ltx_para" id="S2.SS3.SSS1.p1">
<p class="ltx_p" id="S2.SS3.SSS1.p1.1">The Modular RAG framework introduces additional specialized components to enhance retrieval and processing capabilities. The Search module adapts to specific scenarios, enabling direct searches across various data sources like search engines, databases, and knowledge graphs, using LLM-generated code and query languages <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib15" title="">15</a>]</cite>. RAG-Fusion addresses traditional search limitations by employing a multi-query strategy that expands user queries into diverse perspectives, utilizing parallel vector searches and intelligent re-ranking to uncover both explicit and transformative knowledge <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib16" title="">16</a>]</cite>. The Memory module leverages the LLM’s memory to guide retrieval, creating an unbounded memory pool that aligns the text more closely with data distribution through iterative self-enhancement <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib18" title="">18</a>]</cite>. Routing in the RAG system navigates through diverse data sources, selecting the optimal pathway for a query, whether it involves summarization, specific database searches, or merging different information streams <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib19" title="">19</a>]</cite>. The Predict module aims to reduce redundancy and noise by generating context directly through the LLM, ensuring relevance and accuracy <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite>. Lastly, the Task Adapter module tailors RAG to various downstream tasks, automating prompt retrieval for zero-shot inputs and creating task-specific retrievers through few-shot query generation <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib21" title="">21</a>]</cite> .This comprehensive approach not only streamlines the retrieval process but also significantly improves the quality and relevance of the information retrieved, catering to a wide array of tasks and queries with enhanced precision and flexibility.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S2.SS3.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S2.SS3.SSS2.5.1.1">II-C</span>2 </span>New Patterns</h4>
<div class="ltx_para" id="S2.SS3.SSS2.p1">
<p class="ltx_p" id="S2.SS3.SSS2.p1.1">Modular RAG offers remarkable adaptability by allowing module substitution or reconfiguration to address specific challenges. This goes beyond the fixed structures of Naive and Advanced RAG, characterized by a simple “Retrieve” and “Read” mechanism. Moreover, Modular RAG expands this flexibility by integrating new modules or adjusting interaction flow among existing ones, enhancing its applicability across different tasks.</p>
</div>
<div class="ltx_para" id="S2.SS3.SSS2.p2">
<p class="ltx_p" id="S2.SS3.SSS2.p2.1">Innovations such as the Rewrite-Retrieve-Read <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>model leverage the LLM’s capabilities to refine retrieval queries through a rewriting module and a LM-feedback mechanism to update rewriting model., improving task performance. Similarly, approaches like Generate-Read <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite> replace traditional retrieval with LLM-generated content, while Recite-Read <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>]</cite> emphasizes retrieval from model weights, enhancing the model’s ability to handle knowledge-intensive tasks. Hybrid retrieval strategies integrate keyword, semantic, and vector searches to cater to diverse queries. Additionally, employing sub-queries and hypothetical document embeddings (HyDE) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib11" title="">11</a>]</cite> seeks to improve retrieval relevance by focusing on embedding similarities between generated answers and real documents.</p>
</div>
<div class="ltx_para" id="S2.SS3.SSS2.p3">
<p class="ltx_p" id="S2.SS3.SSS2.p3.1">Adjustments in module arrangement and interaction, such as the Demonstrate-Search-Predict (DSP) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib23" title="">23</a>]</cite> framework and the iterative Retrieve-Read-Retrieve-Read flow of ITER-RETGEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite>, showcase the dynamic use of module outputs to bolster another module’s functionality, illustrating a sophisticated understanding of enhancing module synergy. The flexible orchestration of Modular RAG Flow showcases the benefits of adaptive retrieval through techniques such as FLARE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite> and Self-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite>. This approach transcends the fixed RAG retrieval process by evaluating the necessity of retrieval based on different scenarios. Another benefit of a flexible architecture is that the RAG system can more easily integrate with other technologies (such as fine-tuning or reinforcement learning) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>]</cite>. For example, this can involve fine-tuning the retriever for better retrieval results, fine-tuning the generator for more personalized outputs, or engaging in collaborative fine-tuning <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite>.</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S2.SS4">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS4.5.1.1">II-D</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS4.6.2">RAG vs Fine-tuning</span>
</h3>
<div class="ltx_para" id="S2.SS4.p1">
<p class="ltx_p" id="S2.SS4.p1.1">The augmentation of LLMs has attracted considerable attention due to their growing prevalence. Among the optimization methods for LLMs, RAG is often compared with Fine-tuning (FT) and prompt engineering. Each method has distinct characteristics as illustrated in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.F4" title="Figure 4 ‣ II-D RAG vs Fine-tuning ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">4</span></a>. We used a quadrant chart to illustrate the differences among three methods in two dimensions: external knowledge requirements and model adaption requirements. Prompt engineering leverages a model’s inherent capabilities with minimum necessity for external knowledge and model adaption. RAG can be likened to providing a model with a tailored textbook for information retrieval, ideal for precise information retrieval tasks. In contrast, FT is comparable to a student internalizing knowledge over time, suitable for scenarios requiring replication of specific structures, styles, or formats.</p>
</div>
<div class="ltx_para" id="S2.SS4.p2">
<p class="ltx_p" id="S2.SS4.p2.1">RAG excels in dynamic environments by offering real-time knowledge updates and effective utilization of external knowledge sources with high interpretability. However, it comes with higher latency and ethical considerations regarding data retrieval. On the other hand, FT is more static, requiring retraining for updates but enabling deep customization of the model’s behavior and style. It demands significant computational resources for dataset preparation and training, and while it can reduce hallucinations, it may face challenges with unfamiliar data.</p>
</div>
<div class="ltx_para" id="S2.SS4.p3">
<p class="ltx_p" id="S2.SS4.p3.1">In multiple evaluations of their performance on various knowledge-intensive tasks across different topics, <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib28" title="">28</a>]</cite> revealed that while unsupervised fine-tuning shows some improvement, RAG consistently outperforms it, for both existing knowledge encountered during training and entirely new knowledge. Additionally, it was found that LLMs struggle to learn new factual information through unsupervised fine-tuning. The choice between RAG and FT depends on the specific needs for data dynamics, customization, and computational capabilities in the application context. RAG and FT are not mutually exclusive and can complement each other, enhancing a model’s capabilities at different levels. In some instances, their combined use may lead to optimal performance. The optimization process involving RAG and FT may require multiple iterations to achieve satisfactory results.</p>
</div>
<figure class="ltx_figure" id="S2.F4"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="280" id="S2.F4.g1" src="extracted/5498883/images/rag_FT.png" width="479"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 4: </span>RAG compared with other model optimization methods in the aspects of “External Knowledge Required” and “Model Adaption Required”. Prompt Engineering requires low modifications to the model and external knowledge, focusing on harnessing the capabilities of LLMs themselves. Fine-tuning, on the other hand, involves further training the model. In the early stages of RAG (Naive RAG), there is a low demand for model modifications. As research progresses, Modular RAG has become more integrated with fine-tuning techniques.</figcaption>
</figure>
<figure class="ltx_table" id="S2.T1">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">TABLE I: </span>Summary of RAG methods</figcaption>
<div class="ltx_inline-block ltx_align_center ltx_transformed_outer" id="S2.T1.1" style="width:488.0pt;height:1056.3pt;vertical-align:-0.8pt;"><span class="ltx_transformed_inner" style="transform:translate(-61.0pt,131.9pt) scale(0.8,0.8) ;">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1">
<tr class="ltx_tr" id="S2.T1.1.1.1">
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.1">Method</td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.2">Retrieval Source</td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.3">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.3.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.3.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.3.1.1.1">Retrieval</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.3.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.3.1.2.1">Data Type</td>
</tr>
</table></td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.4">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.4.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.4.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.4.1.1.1">Retrieval</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.4.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.4.1.2.1">Granularity</td>
</tr>
</table></td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.5">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.5.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.5.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.5.1.1.1">Augmentation</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.5.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.5.1.2.1">Stage</td>
</tr>
</table></td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.6">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.6.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.6.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.6.1.1.1">Retrieval</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.6.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.6.1.2.1">process</td>
</tr>
</table></td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.2">
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.1">CoG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib29" title="">29</a>]</cite>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.2">Wikipedia</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.3">Text</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.4">Phrase</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.5">Pre-training</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.3">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.1">DenseX <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.2">FactoidWiki</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.4">Proposition</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.4">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.1">EAR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib31" title="">31</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.5">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.1">UPRISE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.6">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.1">RAST <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib32" title="">32</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.7">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.1">Self-Mem <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.8">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.1">FLARE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.2">Search Engine,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.6">Adaptive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.9">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.1">PGRA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib33" title="">33</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.10">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.1">FILCO <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.11">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.1">RADA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib35" title="">35</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.12">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.1">Filter-rerank <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib36" title="">36</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.2">Synthesized dataset</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.13">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.1">R-GQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib37" title="">37</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.4">Sentence Pair</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.14">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.1">LLM-R <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib38" title="">38</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.4">Sentence Pair</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.15">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.1">TIGER <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib39" title="">39</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.16">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.1">LM-Indexer <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib40" title="">40</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.17">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.1">BEQUE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib9" title="">9</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.18">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.1">CT-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib41" title="">41</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.2">Synthesized dataset</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.19">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.1">Atlas <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.2">Wikipedia, Common Crawl</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.20">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.1">RAVEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib43" title="">43</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.21">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.1">RETRO++ <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib44" title="">44</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.2">Pre-training Corpus</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.22">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.1">INSTRUCTRETRO <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.2">Pre-training corpus</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.23">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.1">RRR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.2">Search Engine</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.24">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.1">RA-e2e <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib46" title="">46</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.25">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.1">PROMPTAGATOR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib21" title="">21</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.2">BEIR</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.26">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.1">AAR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib47" title="">47</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.2">MSMARCO,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.27">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.1">RA-DIT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.2">Common Crawl,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.28">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.1">RAG-Robust <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib48" title="">48</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.29">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.1">RA-Long-Form <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib49" title="">49</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.30">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.1">CoN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.31">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.1">Self-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.6">Adaptive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.32">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.1">BGM <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.33">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.1">CoQ <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.34">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.1">Token-Elimination <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib52" title="">52</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.35">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.1">PaperQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib53" title="">53</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.2">Arxiv,Online Database,PubMed</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.36">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.1">NoiseRAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib54" title="">54</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.2">FactoidWiki</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.37">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.1">IAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib55" title="">55</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.2">Search Engine,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.38">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.1">NoMIRACL <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib56" title="">56</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.39">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.1">ToC <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib57" title="">57</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.2">Search Engine,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.6">Recursive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.40">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.1">SKR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib58" title="">58</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.2">Dataset-base,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.6">Adaptive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.41">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.1">ITRG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.42">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.1">RAG-LongContext <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib60" title="">60</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.43">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.1">ITER-RETGEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.44">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.1">IRCoT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib61" title="">61</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.6">Recursive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.45">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.1">LLM-Knowledge-Boundary <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib62" title="">62</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.46">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.1">RAPTOR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib63" title="">63</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.6">Recursive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.47">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.1">RECITE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.2">LLMs</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.48">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.1">ICRALM <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib64" title="">64</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.2">Pile,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.49">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.1">Retrieve-and-Sample <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib65" title="">65</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.50">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.1">Zemi <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib66" title="">66</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.2">C4</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.51">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.1">CRAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.2">Arxiv</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.52">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.1">1-PAGER <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib68" title="">68</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.53">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.1">PRCA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib69" title="">69</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.54">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.1">QLM-Doc-ranking <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib70" title="">70</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.55">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.1">Recomp <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.56">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.1">DSP <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib23" title="">23</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.57">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.1">RePLUG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.2">Pile</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.58">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.1">ARM-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib73" title="">73</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.59">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.1">GenRead <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.2">LLMs</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.60">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.1">UniMS-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib74" title="">74</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.4">Multi</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.61">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.1">CREA-ICL <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib19" title="">19</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.3">Crosslingual,Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.62">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.1">PKG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib75" title="">75</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.2">LLM</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.3">Tabular,Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.63">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.1">SANTA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib76" title="">76</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.3">Code,Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.4">Item</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.64">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.1">SURGE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib77" title="">77</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.2">Freebase</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.4">Sub-Graph</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.65">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.1">MK-ToD <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib78" title="">78</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.4">Entity</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.66">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.1">Dual-Feedback-ToD <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib79" title="">79</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.4">Entity Sequence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.67">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.1">KnowledGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib15" title="">15</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.4">Triplet</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.6">Muti-time</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.68">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.1">FABULA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib80" title="">80</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.2">Dataset-base,Graph</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.4">Entity</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.69">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.1">HyKGE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib81" title="">81</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.2">CMeKG</td>