paper.html

<!DOCTYPE html>

<html lang="en">
<head>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<title>Retrieval-Augmented Generation for Large Language Models: A Survey</title>
<!--Generated on Wed Mar 27 09:16:19 2024 by LaTeXML (version 0.8.7) http://dlmf.nist.gov/LaTeXML/.-->
<meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"/>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet" type="text/css"/>
<link href="/static/browse/0.3.4/css/ar5iv_0.7.4.min.css" rel="stylesheet" type="text/css"/>
<link href="/static/browse/0.3.4/css/latexml_styles.css" rel="stylesheet" type="text/css"/>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.3.3/html2canvas.min.js"></script>
<script src="/static/browse/0.3.4/js/addons.js"></script>
<script src="/static/browse/0.3.4/js/feedbackOverlay.js"></script>
<meta content="
Large language model,  retrieval-augmented generation,  natural language processing,  information retrieval
" lang="en" name="keywords"/>
<base href="/html/2312.10997v5/"/></head>
<body>
<nav class="ltx_page_navbar">
<nav class="ltx_TOC">
<ol class="ltx_toclist">
<li class="ltx_tocentry ltx_tocentry_section"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S1" title="I Introduction ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">I </span><span class="ltx_text ltx_font_smallcaps">Introduction</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2" title="II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">II </span><span class="ltx_text ltx_font_smallcaps">Overview of RAG </span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS1" title="II-A Naive RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-A</span> </span><span class="ltx_text ltx_font_italic">Naive RAG</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS2" title="II-B Advanced RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-B</span> </span><span class="ltx_text ltx_font_italic">Advanced RAG</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS3" title="II-C Modular RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-C</span> </span><span class="ltx_text ltx_font_italic">Modular RAG</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS3.SSS1" title="II-C1 New Modules ‣ II-C Modular RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-C</span>1 </span>New Modules</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS3.SSS2" title="II-C2 New Patterns ‣ II-C Modular RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-C</span>2 </span>New Patterns</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.SS4" title="II-D RAG vs Fine-tuning ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">II-D</span> </span><span class="ltx_text ltx_font_italic">RAG vs Fine-tuning</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3" title="III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">III </span><span class="ltx_text ltx_font_smallcaps">Retrieval</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS1" title="III-A Retrieval Source ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-A</span> </span><span class="ltx_text ltx_font_italic">Retrieval Source</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS1.SSS1" title="III-A1 Data Structure ‣ III-A Retrieval Source ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-A</span>1 </span>Data Structure</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS1.SSS2" title="III-A2 Retrieval Granularity ‣ III-A Retrieval Source ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-A</span>2 </span>Retrieval Granularity</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2" title="III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span> </span><span class="ltx_text ltx_font_italic">Indexing Optimization</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2.SSS1" title="III-B1 Chunking Strategy ‣ III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span>1 </span>Chunking Strategy</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2.SSS2" title="III-B2 Metadata Attachments ‣ III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span>2 </span>Metadata Attachments</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS2.SSS3" title="III-B3 Structural Index ‣ III-B Indexing Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-B</span>3 </span>Structural Index</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3" title="III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span> </span><span class="ltx_text ltx_font_italic">Query Optimization</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3.SSS1" title="III-C1 Query Expansion ‣ III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span>1 </span>Query Expansion</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3.SSS2" title="III-C2 Query Transformation ‣ III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span>2 </span>Query Transformation</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS3.SSS3" title="III-C3 Query Routing ‣ III-C Query Optimization ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-C</span>3 </span>Query Routing</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS4" title="III-D Embedding ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-D</span> </span><span class="ltx_text ltx_font_italic">Embedding</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS4.SSS1" title="III-D1 Mix/hybrid Retrieval ‣ III-D Embedding ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-D</span>1 </span>Mix/hybrid Retrieval</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS4.SSS2" title="III-D2 Fine-tuning Embedding Model ‣ III-D Embedding ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-D</span>2 </span>Fine-tuning Embedding Model</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3.SS5" title="III-E Adapter ‣ III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">III-E</span> </span><span class="ltx_text ltx_font_italic">Adapter</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4" title="IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">IV </span><span class="ltx_text ltx_font_smallcaps">Generation</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS1" title="IV-A Context Curation ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-A</span> </span><span class="ltx_text ltx_font_italic">Context Curation</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS1.SSS1" title="IV-A1 Reranking ‣ IV-A Context Curation ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-A</span>1 </span>Reranking</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS1.SSS2" title="IV-A2 Context Selection/Compression ‣ IV-A Context Curation ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-A</span>2 </span>Context Selection/Compression</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4.SS2" title="IV-B LLM Fine-tuning ‣ IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">IV-B</span> </span><span class="ltx_text ltx_font_italic">LLM Fine-tuning</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5" title="V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">V </span><span class="ltx_text ltx_font_smallcaps">Augmentation process in RAG</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.SS1" title="V-A Iterative Retrieval ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">V-A</span> </span><span class="ltx_text ltx_font_italic">Iterative Retrieval</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.SS2" title="V-B Recursive Retrieval ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">V-B</span> </span><span class="ltx_text ltx_font_italic">Recursive Retrieval</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.SS3" title="V-C Adaptive Retrieval ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">V-C</span> </span><span class="ltx_text ltx_font_italic">Adaptive Retrieval</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6" title="VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">VI </span><span class="ltx_text ltx_font_smallcaps">Task and Evaluation</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS1" title="VI-A Downstream Task ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-A</span> </span><span class="ltx_text ltx_font_italic">Downstream Task</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS2" title="VI-B Evaluation Target ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-B</span> </span><span class="ltx_text ltx_font_italic">Evaluation Target</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS3" title="VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-C</span> </span><span class="ltx_text ltx_font_italic">Evaluation Aspects</span></span></a>
<ol class="ltx_toclist ltx_toclist_subsection">
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS3.SSS1" title="VI-C1 Quality Scores ‣ VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-C</span>1 </span>Quality Scores</span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsubsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS3.SSS2" title="VI-C2 Required Abilities ‣ VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-C</span>2 </span>Required Abilities</span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.SS4" title="VI-D Evaluation Benchmarks and Tools ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VI-D</span> </span><span class="ltx_text ltx_font_italic">Evaluation Benchmarks and Tools</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section">
<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7" title="VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">VII </span><span class="ltx_text ltx_font_smallcaps">Discussion and Future Prospects</span></span></a>
<ol class="ltx_toclist ltx_toclist_section">
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS1" title="VII-A RAG vs Long Context ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-A</span> </span><span class="ltx_text ltx_font_italic">RAG vs Long Context</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS2" title="VII-B RAG Robustness ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-B</span> </span><span class="ltx_text ltx_font_italic">RAG Robustness</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS3" title="VII-C Hybrid Approaches ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-C</span> </span><span class="ltx_text ltx_font_italic">Hybrid Approaches </span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS4" title="VII-D Scaling laws of RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-D</span> </span><span class="ltx_text ltx_font_italic">Scaling laws of RAG </span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS5" title="VII-E Production-Ready RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-E</span> </span><span class="ltx_text ltx_font_italic">Production-Ready RAG</span></span></a></li>
<li class="ltx_tocentry ltx_tocentry_subsection"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.SS6" title="VII-F Multi-modal RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref"><span class="ltx_text">VII-F</span> </span><span class="ltx_text ltx_font_italic">Multi-modal RAG</span></span></a></li>
</ol>
</li>
<li class="ltx_tocentry ltx_tocentry_section"><a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S8" title="VIII Conclusion ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_title"><span class="ltx_tag ltx_tag_ref">VIII </span><span class="ltx_text ltx_font_smallcaps">Conclusion</span></span></a></li>
</ol></nav>
</nav>
<div class="ltx_page_main">
<div class="ltx_page_content"><div class="section" id="target-section"><div id="license-tr">License: arXiv.org perpetual non-exclusive license</div><div id="watermark-tr">arXiv:2312.10997v5 [cs.CL] 27 Mar 2024</div></div>
<article class="ltx_document ltx_authors_1line">
<h1 class="ltx_title ltx_title_document">Retrieval-Augmented Generation for Large Language Models: A Survey</h1>
<div class="ltx_authors">
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yunfan Gao
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yun Xiong
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Xinyu Gao
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Kangxiang Jia
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Jinliu Pan
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yuxi Bi
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">College of Design and Innovation, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Yi Dai
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Jiawei Sun
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Meng Wang
</span><span class="ltx_author_notes">
<span class="ltx_contact ltx_role_affiliation">College of Design and Innovation, Tongji University
</span></span></span>
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Haofen Wang
</span><span class="ltx_author_notes">Corresponding Author.Email:<a class="ltx_ref ltx_url ltx_font_typewriter" href="haofen.wang@tongji.edu.cn" title="">haofen.wang@tongji.edu.cn</a>
<span class="ltx_contact ltx_role_affiliation">Shanghai Research Institute for Intelligent Autonomous Systems, Tongji University
</span>
<span class="ltx_contact ltx_role_affiliation">College of Design and Innovation, Tongji University
</span></span></span>
</div>
<div class="ltx_abstract">
<h6 class="ltx_title ltx_title_abstract">Abstract</h6>
<p class="ltx_p" id="id1.id1">Large Language Models (LLMs) showcase impressive capabilities but encounter challenges like hallucination, outdated knowledge, and non-transparent, untraceable reasoning processes. Retrieval-Augmented Generation (RAG) has emerged as a promising solution by incorporating knowledge from external databases. This enhances the accuracy and credibility of the generation, particularly for knowledge-intensive tasks, and allows for continuous knowledge updates and integration of domain-specific information. RAG synergistically merges LLMs’ intrinsic knowledge with the vast, dynamic repositories of external databases. This comprehensive review paper offers a detailed examination of the progression of RAG paradigms, encompassing the Naive RAG, the Advanced RAG, and the Modular RAG. It meticulously scrutinizes the tripartite foundation of RAG frameworks, which includes the retrieval, the generation and the augmentation techniques. The paper highlights the state-of-the-art technologies embedded in each of these critical components, providing a profound understanding of the advancements in RAG systems. Furthermore, this paper introduces up-to-date evaluation framework and benchmark. At the end, this article delineates the challenges currently faced and points out prospective avenues for research and development <span class="ltx_note ltx_role_footnote" id="footnote1"><sup class="ltx_note_mark">1</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">1</sup><span class="ltx_tag ltx_tag_note">1</span>Resources are available at <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/Tongji-KGLLM/RAG-Survey" title="">https://github.com/Tongji-KGLLM/RAG-Survey</a> </span></span></span>.</p>
</div>
<div class="ltx_keywords">
<h6 class="ltx_title ltx_title_keywords">Index Terms: </h6>
Large language model, retrieval-augmented generation, natural language processing, information retrieval

</div>
<section class="ltx_section" id="S1">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">I </span><span class="ltx_text ltx_font_smallcaps" id="S1.1.1">Introduction</span>
</h2>
<div class="ltx_para" id="S1.p1">
<p class="ltx_p" id="S1.p1.1">Large language models (LLMs) have achieved remarkable success, though they still face significant limitations, especially in domain-specific or knowledge-intensive tasks <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib1" title="">1</a>]</cite>, notably producing “hallucinations” <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib2" title="">2</a>]</cite> when handling queries beyond their training data or requiring current information. To overcome challenges, Retrieval-Augmented Generation (RAG) enhances LLMs by retrieving relevant document chunks from external knowledge base through semantic similarity calculation. By referencing external knowledge, RAG effectively reduces the problem of generating factually incorrect content. Its integration into LLMs has resulted in widespread adoption, establishing RAG as a key technology in advancing chatbots and enhancing the suitability of LLMs for real-world applications.</p>
</div>
<figure class="ltx_figure" id="S1.F1"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="366" id="S1.F1.g1" src="extracted/5498883/images/rag_tech_tree.png" width="509"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 1: </span>Technology tree of RAG research. The stages of involving RAG mainly include pre-training, fine-tuning, and inference. With the emergence of LLMs, research on RAG initially focused on leveraging the powerful in context learning abilities of LLMs, primarily concentrating on the inference stage. Subsequent research has delved deeper, gradually integrating more with the fine-tuning of LLMs. Researchers have also been exploring ways to enhance language models in the pre-training stage through retrieval-augmented techniques.</figcaption>
</figure>
<div class="ltx_para" id="S1.p2">
<p class="ltx_p" id="S1.p2.1">RAG technology has rapidly developed in recent years, and the technology tree summarizing related research is shown in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S1.F1" title="Figure 1 ‣ I Introduction ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">1</span></a>. The development trajectory of RAG in the era of large models exhibits several distinct stage characteristics. Initially, RAG’s inception coincided with the rise of the Transformer architecture, focusing on enhancing language models by incorporating additional knowledge through Pre-Training Models (PTM). This early stage was characterized by foundational work aimed at refining pre-training techniques<cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib3" title="">3</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib5" title="">5</a>]</cite>.The subsequent arrival of ChatGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib6" title="">6</a>]</cite> marked a pivotal moment, with LLM demonstrating powerful in context learning (ICL) capabilities. RAG research shifted towards providing better information for LLMs to answer more complex and knowledge-intensive tasks during the inference stage, leading to rapid development in RAG studies. As research progressed, the enhancement of RAG was no longer limited to the inference stage but began to incorporate more with LLM fine-tuning techniques.
</p>
</div>
<div class="ltx_para" id="S1.p3">
<p class="ltx_p" id="S1.p3.1">The burgeoning field of RAG has experienced swift growth, yet it has not been accompanied by a systematic synthesis that could clarify its broader trajectory. This survey endeavors to fill this gap by mapping out the RAG process and charting its evolution and anticipated future paths, with a focus on the integration of RAG within LLMs. This paper considers both technical paradigms and research methods, summarizing three main research paradigms from over 100 RAG studies, and analyzing key technologies in the core stages of “Retrieval,” “Generation,” and “Augmentation.” On the other hand, current research tends to focus more on methods, lacking analysis and summarization of how to evaluate RAG. This paper comprehensively reviews the downstream tasks, datasets, benchmarks, and evaluation methods applicable to RAG. Overall, this paper sets out to meticulously compile and categorize the foundational technical concepts, historical progression, and the spectrum of RAG methodologies and applications that have emerged post-LLMs. It is designed to equip readers and professionals with a detailed and structured understanding of both large models and RAG. It aims to illuminate the evolution of retrieval augmentation techniques, assess the strengths and weaknesses of various approaches in their respective contexts, and speculate on upcoming trends and innovations.</p>
</div>
<div class="ltx_para" id="S1.p4">
<p class="ltx_p" id="S1.p4.1">Our contributions are as follows:
</p>
<ul class="ltx_itemize" id="S1.I1">
<li class="ltx_item" id="S1.I1.i1" style="list-style-type:none;">
<span class="ltx_tag ltx_tag_item">•</span>
<div class="ltx_para" id="S1.I1.i1.p1">
<p class="ltx_p" id="S1.I1.i1.p1.1">In this survey, we present a thorough and systematic review of the state-of-the-art RAG methods, delineating its evolution through paradigms including naive RAG, advanced RAG, and modular RAG. This review contextualizes the broader scope of RAG research within the landscape of LLMs.</p>
</div>
</li>
<li class="ltx_item" id="S1.I1.i2" style="list-style-type:none;">
<span class="ltx_tag ltx_tag_item">•</span>
<div class="ltx_para" id="S1.I1.i2.p1">
<p class="ltx_p" id="S1.I1.i2.p1.1">We identify and discuss the central technologies integral to the RAG process, specifically focusing on the aspects of “Retrieval”, “Generation” and “Augmentation”, and delve into their synergies, elucidating how these components intricately collaborate to form a cohesive and effective RAG framework.</p>
</div>
</li>
<li class="ltx_item" id="S1.I1.i3" style="list-style-type:none;">
<span class="ltx_tag ltx_tag_item">•</span>
<div class="ltx_para" id="S1.I1.i3.p1">
<p class="ltx_p" id="S1.I1.i3.p1.1">We have summarized the current assessment methods of RAG, covering 26 tasks, nearly 50 datasets, outlining the evaluation objectives and metrics, as well as the current evaluation benchmarks and tools. Additionally, we anticipate future directions for RAG, emphasizing potential enhancements to tackle current challenges.</p>
</div>
</li>
</ul>
</div>
<div class="ltx_para" id="S1.p5">
<p class="ltx_p" id="S1.p5.1">The paper unfolds as follows: Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2" title="II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">II</span></a> introduces the main concept and current paradigms of RAG. The following three sections explore core components—“Retrieval”, “Generation” and “Augmentation”, respectively.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S3" title="III Retrieval ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">III</span></a> focuses on optimization methods in retrieval,including indexing, query and embedding optimization.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S4" title="IV Generation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">IV</span></a> concentrates on post-retrieval process and LLM fine-tuning in generation.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5" title="V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">V</span></a> analyzes the three augmentation processes.
Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6" title="VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">VI</span></a> focuses on RAG’s downstream tasks and evaluation system. Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7" title="VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">VII</span></a> mainly discusses the challenges that RAG currently faces and its future development directions. At last, the paper concludes in Section <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S8" title="VIII Conclusion ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">VIII</span></a>.</p>
</div>
</section>
<section class="ltx_section" id="S2">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">II </span><span class="ltx_text ltx_font_smallcaps" id="S2.1.1">Overview of RAG </span>
</h2>
<div class="ltx_para" id="S2.p1">
<p class="ltx_p" id="S2.p1.1">A typical application of RAG is illustrated in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.F2" title="Figure 2 ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">2</span></a>. Here, a user poses a question to ChatGPT about a recent, widely discussed news. Given ChatGPT’s reliance on pre-training data, it initially lacks the capacity to provide updates on recent developments. RAG bridges this information gap by sourcing and incorporating knowledge from external databases. In this case, it gathers relevant news articles related to the user’s query. These articles, combined with the original question, form a comprehensive prompt that empowers LLMs to generate a well-informed answer.</p>
</div>
<figure class="ltx_figure" id="S2.F2"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="301" id="S2.F2.g1" src="extracted/5498883/images/RAG_case.png" width="509"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 2: </span>A representative instance of the RAG process applied to question answering. It mainly consists of 3 steps. 1) Indexing. Documents are split into chunks, encoded into vectors, and stored in a vector database. 2) Retrieval. Retrieve the Top k chunks most relevant to the question based on semantic similarity. 3) Generation. Input the original question and the retrieved chunks together into LLM to generate the final answer.</figcaption>
</figure>
<div class="ltx_para" id="S2.p2">
<p class="ltx_p" id="S2.p2.1">The RAG research paradigm is continuously evolving, and we categorize it into three stages: Naive RAG, Advanced RAG, and Modular RAG, as showed in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.F3" title="Figure 3 ‣ II-B Advanced RAG ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">3</span></a>. Despite RAG method are cost-effective and surpass the performance of the native LLM, they also exhibit several limitations. The development of Advanced RAG and Modular RAG is a response to these specific shortcomings in Naive RAG.</p>
</div>
<section class="ltx_subsection" id="S2.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS1.5.1.1">II-A</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS1.6.2">Naive RAG</span>
</h3>
<div class="ltx_para" id="S2.SS1.p1">
<p class="ltx_p" id="S2.SS1.p1.1">The Naive RAG research paradigm represents the earliest methodology, which gained prominence shortly after the widespread adoption of ChatGPT. The Naive RAG follows a traditional process that includes indexing, retrieval, and generation, which is also characterized as a “Retrieve-Read” framework <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S2.SS1.p2">
<p class="ltx_p" id="S2.SS1.p2.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p2.1.1">Indexing</em> starts with the cleaning and extraction of raw data in diverse formats like PDF, HTML, Word, and Markdown, which is then converted into a uniform plain text format. To accommodate the context limitations of language models, text is segmented into smaller, digestible chunks. Chunks are then encoded into vector representations using an embedding model and stored in vector database. This step is crucial for enabling efficient similarity searches in the subsequent retrieval phase.</p>
</div>
<div class="ltx_para" id="S2.SS1.p3">
<p class="ltx_p" id="S2.SS1.p3.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p3.1.1">Retrieval</em>. Upon receipt of a user query, the RAG system employs the same encoding model utilized during the indexing phase to transform the query into a vector representation. It then computes the similarity scores between the query vector and the vector of chunks within the indexed corpus. The system prioritizes and retrieves the top K chunks that demonstrate the greatest similarity to the query. These chunks are subsequently used as the expanded context in prompt.</p>
</div>
<div class="ltx_para" id="S2.SS1.p4">
<p class="ltx_p" id="S2.SS1.p4.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p4.1.1">Generation</em>. The posed query and selected documents are synthesized into a coherent prompt to which a large language model is tasked with formulating a response. The model’s approach to answering may vary depending on task-specific criteria, allowing it to either draw upon its inherent parametric knowledge or restrict its responses to the information contained within the provided documents. In cases of ongoing dialogues, any existing conversational history can be integrated into the prompt, enabling the model to engage in multi-turn dialogue interactions effectively.</p>
</div>
<div class="ltx_para" id="S2.SS1.p5">
<p class="ltx_p" id="S2.SS1.p5.1">However, Naive RAG encounters notable drawbacks:</p>
</div>
<div class="ltx_para" id="S2.SS1.p6">
<p class="ltx_p" id="S2.SS1.p6.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p6.1.1">Retrieval Challenges</em>. The retrieval phase often struggles with precision and recall, leading to the selection of misaligned or irrelevant chunks, and the missing of crucial information.</p>
</div>
<div class="ltx_para" id="S2.SS1.p7">
<p class="ltx_p" id="S2.SS1.p7.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p7.1.1">Generation Difficulties</em>. In generating responses, the model may face the issue of hallucination, where it produces content not supported by the retrieved context. This phase can also suffer from irrelevance, toxicity, or bias in the outputs, detracting from the quality and reliability of the responses.</p>
</div>
<div class="ltx_para" id="S2.SS1.p8">
<p class="ltx_p" id="S2.SS1.p8.1"><em class="ltx_emph ltx_font_italic" id="S2.SS1.p8.1.1">Augmentation Hurdles</em>. Integrating retrieved information with the different task can be challenging, sometimes resulting in disjointed or incoherent outputs. The process may also encounter redundancy when similar information is retrieved from multiple sources, leading to repetitive responses. Determining the significance and relevance of various passages and ensuring stylistic and tonal consistency add further complexity. Facing complex issues, a single retrieval based on the original query may not suffice to acquire adequate context information.</p>
</div>
<div class="ltx_para" id="S2.SS1.p9">
<p class="ltx_p" id="S2.SS1.p9.1">Moreover, there’s a concern that generation models might overly rely on augmented information, leading to outputs that simply echo retrieved content without adding insightful or synthesized information.</p>
</div>
</section>
<section class="ltx_subsection" id="S2.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS2.5.1.1">II-B</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS2.6.2">Advanced RAG</span>
</h3>
<div class="ltx_para" id="S2.SS2.p1">
<p class="ltx_p" id="S2.SS2.p1.1">Advanced RAG introduces specific improvements to overcome the limitations of Naive RAG. Focusing on enhancing retrieval quality, it employs pre-retrieval and post-retrieval strategies. To tackle the indexing issues, Advanced RAG refines its indexing techniques through the use of a sliding window approach, fine-grained segmentation, and the incorporation of metadata. Additionally, it incorporates several optimization methods to streamline the retrieval process<cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib8" title="">8</a>]</cite>.
</p>
</div>
<div class="ltx_para" id="S2.SS2.p2">
<p class="ltx_p" id="S2.SS2.p2.1"><em class="ltx_emph ltx_font_italic" id="S2.SS2.p2.1.1">Pre-retrieval process</em>. In this stage, the primary focus is on optimizing the indexing structure and the original query. The goal of optimizing indexing is to enhance the quality of the content being indexed. This involves strategies: enhancing data granularity, optimizing index structures, adding metadata, alignment optimization, and mixed retrieval. While the goal of query optimization is to make the user’s original question clearer and more suitable for the retrieval task. Common methods include query rewriting query transformation, query expansion and other techniques <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib9" title="">9</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib10" title="">10</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib11" title="">11</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S2.SS2.p3">
<p class="ltx_p" id="S2.SS2.p3.1"><em class="ltx_emph ltx_font_italic" id="S2.SS2.p3.1.1">Post-Retrieval Process</em>. Once relevant context is retrieved, it’s crucial to integrate it effectively with the query. The main methods in post-retrieval process include rerank chunks and context compressing. Re-ranking the retrieved information to relocate the most relevant content to the edges of the prompt is a key strategy. This concept has been implemented in frameworks such as LlamaIndex<span class="ltx_note ltx_role_footnote" id="footnote2"><sup class="ltx_note_mark">2</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">2</sup><span class="ltx_tag ltx_tag_note">2</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.llamaindex.ai" title="">https://www.llamaindex.ai</a></span></span></span>, LangChain<span class="ltx_note ltx_role_footnote" id="footnote3"><sup class="ltx_note_mark">3</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">3</sup><span class="ltx_tag ltx_tag_note">3</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.langchain.com/" title="">https://www.langchain.com/</a></span></span></span>, and HayStack <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib12" title="">12</a>]</cite>. Feeding all relevant documents directly into LLMs can lead to information overload, diluting the focus on key details with irrelevant content.To mitigate this, post-retrieval efforts concentrate on selecting the essential information, emphasizing critical sections, and shortening the context to be processed.
</p>
</div>
<figure class="ltx_figure" id="S2.F3"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="292" id="S2.F3.g1" src="extracted/5498883/images/RAG_FrameCompre_eng.png" width="480"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 3: </span>Comparison between the three paradigms of RAG. (Left) Naive RAG mainly consists of three parts: indexing, retrieval and generation. (Middle) Advanced RAG proposes multiple optimization strategies around pre-retrieval and post-retrieval, with a process similar to the Naive RAG, still following a chain-like structure. (Right) Modular RAG inherits and develops from the previous paradigm, showcasing greater flexibility overall. This is evident in the introduction of multiple specific functional modules and the replacement of existing modules. The overall process is not limited to sequential retrieval and generation; it includes methods such as iterative and adaptive retrieval.</figcaption>
</figure>
</section>
<section class="ltx_subsection" id="S2.SS3">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS3.5.1.1">II-C</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS3.6.2">Modular RAG</span>
</h3>
<div class="ltx_para" id="S2.SS3.p1">
<p class="ltx_p" id="S2.SS3.p1.1">The modular RAG architecture advances beyond the former two RAG paradigms, offering enhanced adaptability and versatility. It incorporates diverse strategies for improving its components, such as adding a search module for similarity searches and refining the retriever through fine-tuning. Innovations like restructured RAG modules <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite> and rearranged RAG pipelines <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite> have been introduced to tackle specific challenges. The shift towards a modular RAG approach is becoming prevalent, supporting both sequential processing and integrated end-to-end training across its components. Despite its distinctiveness, Modular RAG builds upon the foundational principles of Advanced and Naive RAG, illustrating a progression and refinement within the RAG family.</p>
</div>
<section class="ltx_subsubsection" id="S2.SS3.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S2.SS3.SSS1.5.1.1">II-C</span>1 </span>New Modules</h4>
<div class="ltx_para" id="S2.SS3.SSS1.p1">
<p class="ltx_p" id="S2.SS3.SSS1.p1.1">The Modular RAG framework introduces additional specialized components to enhance retrieval and processing capabilities. The Search module adapts to specific scenarios, enabling direct searches across various data sources like search engines, databases, and knowledge graphs, using LLM-generated code and query languages <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib15" title="">15</a>]</cite>. RAG-Fusion addresses traditional search limitations by employing a multi-query strategy that expands user queries into diverse perspectives, utilizing parallel vector searches and intelligent re-ranking to uncover both explicit and transformative knowledge <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib16" title="">16</a>]</cite>. The Memory module leverages the LLM’s memory to guide retrieval, creating an unbounded memory pool that aligns the text more closely with data distribution through iterative self-enhancement <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib18" title="">18</a>]</cite>. Routing in the RAG system navigates through diverse data sources, selecting the optimal pathway for a query, whether it involves summarization, specific database searches, or merging different information streams <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib19" title="">19</a>]</cite>. The Predict module aims to reduce redundancy and noise by generating context directly through the LLM, ensuring relevance and accuracy <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite>. Lastly, the Task Adapter module tailors RAG to various downstream tasks, automating prompt retrieval for zero-shot inputs and creating task-specific retrievers through few-shot query generation <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib21" title="">21</a>]</cite> .This comprehensive approach not only streamlines the retrieval process but also significantly improves the quality and relevance of the information retrieved, catering to a wide array of tasks and queries with enhanced precision and flexibility.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S2.SS3.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S2.SS3.SSS2.5.1.1">II-C</span>2 </span>New Patterns</h4>
<div class="ltx_para" id="S2.SS3.SSS2.p1">
<p class="ltx_p" id="S2.SS3.SSS2.p1.1">Modular RAG offers remarkable adaptability by allowing module substitution or reconfiguration to address specific challenges. This goes beyond the fixed structures of Naive and Advanced RAG, characterized by a simple “Retrieve” and “Read” mechanism. Moreover, Modular RAG expands this flexibility by integrating new modules or adjusting interaction flow among existing ones, enhancing its applicability across different tasks.</p>
</div>
<div class="ltx_para" id="S2.SS3.SSS2.p2">
<p class="ltx_p" id="S2.SS3.SSS2.p2.1">Innovations such as the Rewrite-Retrieve-Read <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>model leverage the LLM’s capabilities to refine retrieval queries through a rewriting module and a LM-feedback mechanism to update rewriting model., improving task performance. Similarly, approaches like Generate-Read <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite> replace traditional retrieval with LLM-generated content, while Recite-Read <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>]</cite> emphasizes retrieval from model weights, enhancing the model’s ability to handle knowledge-intensive tasks. Hybrid retrieval strategies integrate keyword, semantic, and vector searches to cater to diverse queries. Additionally, employing sub-queries and hypothetical document embeddings (HyDE) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib11" title="">11</a>]</cite> seeks to improve retrieval relevance by focusing on embedding similarities between generated answers and real documents.</p>
</div>
<div class="ltx_para" id="S2.SS3.SSS2.p3">
<p class="ltx_p" id="S2.SS3.SSS2.p3.1">Adjustments in module arrangement and interaction, such as the Demonstrate-Search-Predict (DSP) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib23" title="">23</a>]</cite> framework and the iterative Retrieve-Read-Retrieve-Read flow of ITER-RETGEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite>, showcase the dynamic use of module outputs to bolster another module’s functionality, illustrating a sophisticated understanding of enhancing module synergy. The flexible orchestration of Modular RAG Flow showcases the benefits of adaptive retrieval through techniques such as FLARE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite> and Self-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite>. This approach transcends the fixed RAG retrieval process by evaluating the necessity of retrieval based on different scenarios. Another benefit of a flexible architecture is that the RAG system can more easily integrate with other technologies (such as fine-tuning or reinforcement learning) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>]</cite>. For example, this can involve fine-tuning the retriever for better retrieval results, fine-tuning the generator for more personalized outputs, or engaging in collaborative fine-tuning <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite>.</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S2.SS4">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S2.SS4.5.1.1">II-D</span> </span><span class="ltx_text ltx_font_italic" id="S2.SS4.6.2">RAG vs Fine-tuning</span>
</h3>
<div class="ltx_para" id="S2.SS4.p1">
<p class="ltx_p" id="S2.SS4.p1.1">The augmentation of LLMs has attracted considerable attention due to their growing prevalence. Among the optimization methods for LLMs, RAG is often compared with Fine-tuning (FT) and prompt engineering. Each method has distinct characteristics as illustrated in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.F4" title="Figure 4 ‣ II-D RAG vs Fine-tuning ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">4</span></a>. We used a quadrant chart to illustrate the differences among three methods in two dimensions: external knowledge requirements and model adaption requirements. Prompt engineering leverages a model’s inherent capabilities with minimum necessity for external knowledge and model adaption. RAG can be likened to providing a model with a tailored textbook for information retrieval, ideal for precise information retrieval tasks. In contrast, FT is comparable to a student internalizing knowledge over time, suitable for scenarios requiring replication of specific structures, styles, or formats.</p>
</div>
<div class="ltx_para" id="S2.SS4.p2">
<p class="ltx_p" id="S2.SS4.p2.1">RAG excels in dynamic environments by offering real-time knowledge updates and effective utilization of external knowledge sources with high interpretability. However, it comes with higher latency and ethical considerations regarding data retrieval. On the other hand, FT is more static, requiring retraining for updates but enabling deep customization of the model’s behavior and style. It demands significant computational resources for dataset preparation and training, and while it can reduce hallucinations, it may face challenges with unfamiliar data.</p>
</div>
<div class="ltx_para" id="S2.SS4.p3">
<p class="ltx_p" id="S2.SS4.p3.1">In multiple evaluations of their performance on various knowledge-intensive tasks across different topics, <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib28" title="">28</a>]</cite> revealed that while unsupervised fine-tuning shows some improvement, RAG consistently outperforms it, for both existing knowledge encountered during training and entirely new knowledge. Additionally, it was found that LLMs struggle to learn new factual information through unsupervised fine-tuning. The choice between RAG and FT depends on the specific needs for data dynamics, customization, and computational capabilities in the application context. RAG and FT are not mutually exclusive and can complement each other, enhancing a model’s capabilities at different levels. In some instances, their combined use may lead to optimal performance. The optimization process involving RAG and FT may require multiple iterations to achieve satisfactory results.</p>
</div>
<figure class="ltx_figure" id="S2.F4"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="280" id="S2.F4.g1" src="extracted/5498883/images/rag_FT.png" width="479"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 4: </span>RAG compared with other model optimization methods in the aspects of “External Knowledge Required” and “Model Adaption Required”. Prompt Engineering requires low modifications to the model and external knowledge, focusing on harnessing the capabilities of LLMs themselves. Fine-tuning, on the other hand, involves further training the model. In the early stages of RAG (Naive RAG), there is a low demand for model modifications. As research progresses, Modular RAG has become more integrated with fine-tuning techniques.</figcaption>
</figure>
<figure class="ltx_table" id="S2.T1">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">TABLE I: </span>Summary of RAG methods</figcaption>
<div class="ltx_inline-block ltx_align_center ltx_transformed_outer" id="S2.T1.1" style="width:488.0pt;height:1056.3pt;vertical-align:-0.8pt;"><span class="ltx_transformed_inner" style="transform:translate(-61.0pt,131.9pt) scale(0.8,0.8) ;">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1">
<tr class="ltx_tr" id="S2.T1.1.1.1">
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.1">Method</td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.2">Retrieval Source</td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.3">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.3.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.3.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.3.1.1.1">Retrieval</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.3.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.3.1.2.1">Data Type</td>
</tr>
</table></td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.4">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.4.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.4.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.4.1.1.1">Retrieval</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.4.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.4.1.2.1">Granularity</td>
</tr>
</table></td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.5">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.5.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.5.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.5.1.1.1">Augmentation</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.5.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.5.1.2.1">Stage</td>
</tr>
</table></td>
<td class="ltx_td ltx_align_center ltx_border_tt" id="S2.T1.1.1.1.6">
<table class="ltx_tabular ltx_align_middle" id="S2.T1.1.1.1.6.1">
<tr class="ltx_tr" id="S2.T1.1.1.1.6.1.1">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.6.1.1.1">Retrieval</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.1.6.1.2">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.1.6.1.2.1">process</td>
</tr>
</table></td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.2">
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.1">CoG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib29" title="">29</a>]</cite>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.2">Wikipedia</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.3">Text</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.4">Phrase</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.5">Pre-training</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S2.T1.1.1.2.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.3">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.1">DenseX <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.2">FactoidWiki</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.4">Proposition</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.3.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.4">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.1">EAR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib31" title="">31</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.4.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.5">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.1">UPRISE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.5.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.6">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.1">RAST <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib32" title="">32</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.6.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.7">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.1">Self-Mem <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.7.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.8">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.1">FLARE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.2">Search Engine,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.8.6">Adaptive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.9">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.1">PGRA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib33" title="">33</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.9.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.10">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.1">FILCO <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.10.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.11">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.1">RADA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib35" title="">35</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.11.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.12">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.1">Filter-rerank <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib36" title="">36</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.2">Synthesized dataset</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.12.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.13">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.1">R-GQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib37" title="">37</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.4">Sentence Pair</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.13.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.14">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.1">LLM-R <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib38" title="">38</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.4">Sentence Pair</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.14.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.15">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.1">TIGER <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib39" title="">39</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.15.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.16">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.1">LM-Indexer <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib40" title="">40</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.16.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.17">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.1">BEQUE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib9" title="">9</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.17.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.18">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.1">CT-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib41" title="">41</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.2">Synthesized dataset</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.4">Item-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.18.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.19">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.1">Atlas <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.2">Wikipedia, Common Crawl</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.19.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.20">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.1">RAVEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib43" title="">43</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.20.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.21">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.1">RETRO++ <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib44" title="">44</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.2">Pre-training Corpus</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.21.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.22">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.1">INSTRUCTRETRO <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.2">Pre-training corpus</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.22.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.23">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.1">RRR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.2">Search Engine</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.23.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.24">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.1">RA-e2e <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib46" title="">46</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.24.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.25">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.1">PROMPTAGATOR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib21" title="">21</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.2">BEIR</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.25.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.26">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.1">AAR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib47" title="">47</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.2">MSMARCO,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.26.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.27">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.1">RA-DIT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.2">Common Crawl,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.27.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.28">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.1">RAG-Robust <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib48" title="">48</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.28.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.29">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.1">RA-Long-Form <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib49" title="">49</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.29.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.30">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.1">CoN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.30.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.31">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.1">Self-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.31.6">Adaptive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.32">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.1">BGM <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.32.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.33">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.1">CoQ <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.33.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.34">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.1">Token-Elimination <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib52" title="">52</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.34.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.35">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.1">PaperQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib53" title="">53</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.2">Arxiv,Online Database,PubMed</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.35.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.36">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.1">NoiseRAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib54" title="">54</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.2">FactoidWiki</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.36.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.37">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.1">IAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib55" title="">55</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.2">Search Engine,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.37.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.38">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.1">NoMIRACL <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib56" title="">56</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.38.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.39">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.1">ToC <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib57" title="">57</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.2">Search Engine,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.39.6">Recursive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.40">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.1">SKR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib58" title="">58</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.2">Dataset-base,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.40.6">Adaptive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.41">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.1">ITRG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.41.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.42">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.1">RAG-LongContext <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib60" title="">60</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.42.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.43">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.1">ITER-RETGEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.43.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.44">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.1">IRCoT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib61" title="">61</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.44.6">Recursive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.45">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.1">LLM-Knowledge-Boundary <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib62" title="">62</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.45.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.46">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.1">RAPTOR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib63" title="">63</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.46.6">Recursive</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.47">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.1">RECITE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.2">LLMs</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.47.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.48">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.1">ICRALM <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib64" title="">64</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.2">Pile,Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.48.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.49">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.1">Retrieve-and-Sample <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib65" title="">65</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.49.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.50">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.1">Zemi <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib66" title="">66</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.2">C4</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.50.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.51">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.1">CRAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.2">Arxiv</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.51.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.52">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.1">1-PAGER <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib68" title="">68</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.52.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.53">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.1">PRCA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib69" title="">69</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.53.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.54">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.1">QLM-Doc-ranking <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib70" title="">70</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.54.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.55">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.1">Recomp <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.55.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.56">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.1">DSP <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib23" title="">23</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.56.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.57">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.1">RePLUG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.2">Pile</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.57.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.58">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.1">ARM-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib73" title="">73</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.58.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.59">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.1">GenRead <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.2">LLMs</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.4">Doc</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.59.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.60">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.1">UniMS-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib74" title="">74</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.3">Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.4">Multi</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.60.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.61">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.1">CREA-ICL <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib19" title="">19</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.3">Crosslingual,Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.4">Sentence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.61.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.62">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.1">PKG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib75" title="">75</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.2">LLM</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.3">Tabular,Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.4">Chunk</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.62.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.63">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.1">SANTA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib76" title="">76</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.3">Code,Text</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.4">Item</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.5">Pre-training</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.63.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.64">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.1">SURGE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib77" title="">77</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.2">Freebase</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.4">Sub-Graph</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.64.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.65">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.1">MK-ToD <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib78" title="">78</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.4">Entity</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.65.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.66">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.1">Dual-Feedback-ToD <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib79" title="">79</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.4">Entity Sequence</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.5">Tuning</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.66.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.67">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.1">KnowledGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib15" title="">15</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.2">Dataset-base</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.4">Triplet</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.67.6">Muti-time</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.68">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.1">FABULA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib80" title="">80</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.2">Dataset-base,Graph</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.4">Entity</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.68.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.69">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.1">HyKGE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib81" title="">81</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.2">CMeKG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.4">Entity</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.69.6">Once</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.70">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.70.1">KALMV <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib82" title="">82</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.70.2">Wikipedia</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.70.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.70.4">Triplet</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.70.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.70.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.71">
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.71.1">RoG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib83" title="">83</a>]</cite>
</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.71.2">Freebase</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.71.3">KG</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.71.4">Triplet</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.71.5">Inference</td>
<td class="ltx_td ltx_align_center" id="S2.T1.1.1.71.6">Iterative</td>
</tr>
<tr class="ltx_tr" id="S2.T1.1.1.72">
<td class="ltx_td ltx_align_center ltx_border_bb" id="S2.T1.1.1.72.1">G-Retriever <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib84" title="">84</a>]</cite>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S2.T1.1.1.72.2">Dataset-base</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S2.T1.1.1.72.3">TextGraph</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S2.T1.1.1.72.4">Sub-Graph</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S2.T1.1.1.72.5">Inference</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S2.T1.1.1.72.6">Once</td>
</tr>
</table>
</span></div>
</figure>
</section>
</section>
<section class="ltx_section" id="S3">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">III </span><span class="ltx_text ltx_font_smallcaps" id="S3.1.1">Retrieval</span>
</h2>
<div class="ltx_para" id="S3.p1">
<p class="ltx_p" id="S3.p1.1">In the context of RAG, it is crucial to efficiently retrieve relevant documents from the data source. There are several key issues involved, such as the retrieval source, retrieval granularity, pre-processing of the retrieval, and selection of the corresponding embedding model.</p>
</div>
<section class="ltx_subsection" id="S3.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S3.SS1.5.1.1">III-A</span> </span><span class="ltx_text ltx_font_italic" id="S3.SS1.6.2">Retrieval Source</span>
</h3>
<div class="ltx_para" id="S3.SS1.p1">
<p class="ltx_p" id="S3.SS1.p1.1">RAG relies on external knowledge to enhance LLMs, while the type of retrieval source and the granularity of retrieval units both affect the final generation results.</p>
</div>
<section class="ltx_subsubsection" id="S3.SS1.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS1.SSS1.5.1.1">III-A</span>1 </span>Data Structure</h4>
<div class="ltx_para" id="S3.SS1.SSS1.p1">
<p class="ltx_p" id="S3.SS1.SSS1.p1.1">Initially, text is s the mainstream source of retrieval. Subsequently, the retrieval source expanded to include semi-structured data (PDF) and structured data (Knowledge Graph, KG) for enhancement. In addition to retrieving from original external sources, there is also a growing trend in recent researches towards utilizing content generated by LLMs themselves for retrieval and enhancement purposes.</p>
</div>
<div class="ltx_para" id="S3.SS1.SSS1.p2">
<p class="ltx_p" id="S3.SS1.SSS1.p2.1"><em class="ltx_emph ltx_font_italic" id="S3.SS1.SSS1.p2.1.1">Unstructured Data</em>, such as text, is the most widely used retrieval source, which are mainly gathered from corpus. For open-domain question-answering (ODQA) tasks, the primary retrieval sources are Wikipedia Dump with the current major versions including HotpotQA <span class="ltx_note ltx_role_footnote" id="footnote4"><sup class="ltx_note_mark">4</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">4</sup><span class="ltx_tag ltx_tag_note">4</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://hotpotqa.github.io/wiki-readme.html" title="">https://hotpotqa.github.io/wiki-readme.html</a></span></span></span> (1st October , 2017), DPR<span class="ltx_note ltx_role_footnote" id="footnote5"><sup class="ltx_note_mark">5</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">5</sup><span class="ltx_tag ltx_tag_note">5</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/facebookresearch/DPR" title="">https://github.com/facebookresearch/DPR</a></span></span></span> (20 December, 2018). In addition to encyclopedic data, common unstructured data includes cross-lingual text <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib19" title="">19</a>]</cite> and domain-specific data (such as medical <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>]</cite>and legal domains <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib29" title="">29</a>]</cite>).</p>
</div>
<div class="ltx_para" id="S3.SS1.SSS1.p3">
<p class="ltx_p" id="S3.SS1.SSS1.p3.1"><em class="ltx_emph ltx_font_italic" id="S3.SS1.SSS1.p3.1.1">Semi-structured data</em>. typically refers to data that contains a combination of text and table information, such as PDF. Handling semi-structured data poses challenges for conventional RAG systems due to two main reasons. Firstly, text splitting processes may inadvertently separate tables, leading to data corruption during retrieval. Secondly, incorporating tables into the data can complicate semantic similarity searches. When dealing with semi-structured data, one approach involves leveraging the code capabilities of LLMs to execute Text-2-SQL queries on tables within databases, such as TableGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib85" title="">85</a>]</cite>. Alternatively, tables can be transformed into text format for further analysis using text-based methods <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib75" title="">75</a>]</cite>. However, both of these methods are not optimal solutions, indicating substantial research opportunities in this area.</p>
</div>
<div class="ltx_para" id="S3.SS1.SSS1.p4">
<p class="ltx_p" id="S3.SS1.SSS1.p4.1"><em class="ltx_emph ltx_font_italic" id="S3.SS1.SSS1.p4.1.1">Structured data</em>, such as knowledge graphs (KGs) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib86" title="">86</a>]</cite> , which are typically verified and can provide more precise information. KnowledGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib15" title="">15</a>]</cite> generates KB search queries and stores knowledge in a personalized base, enhancing the RAG model’s knowledge richness. In response to the limitations of LLMs in understanding and answering questions about textual graphs, G-Retriever <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib84" title="">84</a>]</cite> integrates Graph Neural Networks (GNNs), LLMs and RAG, enhancing graph comprehension and question-answering capabilities through soft prompting of the LLM, and employs the Prize-Collecting Steiner Tree (PCST) optimization problem for targeted graph retrieval. On the contrary, it requires additional effort to build, validate, and maintain structured databases. On the contrary, it requires additional effort to build, validate, and maintain structured databases.</p>
</div>
<div class="ltx_para" id="S3.SS1.SSS1.p5">
<p class="ltx_p" id="S3.SS1.SSS1.p5.1"><em class="ltx_emph ltx_font_italic" id="S3.SS1.SSS1.p5.1.1">LLMs-Generated Content.</em> Addressing the limitations of external auxiliary information in RAG, some research has focused on exploiting LLMs’ internal knowledge. SKR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib58" title="">58</a>]</cite> classifies questions as known or unknown, applying retrieval enhancement selectively. GenRead <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>]</cite> replaces the retriever with an LLM generator, finding that LLM-generated contexts often contain more accurate answers due to better alignment with the pre-training objectives of causal language modeling. Selfmem <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>]</cite> iteratively creates an unbounded memory pool with a retrieval-enhanced generator, using a memory selector to choose outputs that serve as dual problems to the original question, thus self-enhancing the generative model. These methodologies underscore the breadth of innovative data source utilization in RAG, striving to improve model performance and task effectiveness.
</p>
</div>
</section>
<section class="ltx_subsubsection" id="S3.SS1.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS1.SSS2.5.1.1">III-A</span>2 </span>Retrieval Granularity</h4>
<div class="ltx_para" id="S3.SS1.SSS2.p1">
<p class="ltx_p" id="S3.SS1.SSS2.p1.1">Another important factor besides the data format of the retrieval source is the granularity of the retrieved data. Coarse-grained retrieval units theoretically can provide more relevant information for the problem, but they may also contain redundant content, which could distract the retriever and language models in downstream tasks <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib87" title="">87</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>]</cite>. On the other hand, fine-grained retrieval unit granularity increases the burden of retrieval and does not guarantee semantic integrity and meeting the required knowledge. Choosing the appropriate retrieval granularity during inference can be a simple and effective strategy to improve the retrieval and downstream task performance of dense retrievers.</p>
</div>
<div class="ltx_para" id="S3.SS1.SSS2.p2">
<p class="ltx_p" id="S3.SS1.SSS2.p2.1">In text, retrieval granularity ranges from fine to coarse, including Token, Phrase, Sentence, Proposition, Chunks, Document. Among them, DenseX <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>]</cite>proposed the concept of using propositions as retrieval units. Propositions are defined as atomic expressions in the text, each encapsulating a unique factual segment and presented in a concise, self-contained natural language format. This approach aims to enhance retrieval precision and relevance. On the Knowledge Graph (KG), retrieval granularity includes Entity, Triplet, and sub-Graph. The granularity of retrieval can also be adapted to downstream tasks, such as retrieving Item IDs <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib40" title="">40</a>]</cite>in recommendation tasks and Sentence pairs <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib38" title="">38</a>]</cite>. Detailed information is illustrated in Table <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S2.T1" title="TABLE I ‣ II-D RAG vs Fine-tuning ‣ II Overview of RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">I</span></a>.</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S3.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S3.SS2.5.1.1">III-B</span> </span><span class="ltx_text ltx_font_italic" id="S3.SS2.6.2">Indexing Optimization</span>
</h3>
<div class="ltx_para" id="S3.SS2.p1">
<p class="ltx_p" id="S3.SS2.p1.1">In the Indexing phase, documents will be processed, segmented, and transformed into Embeddings to be stored in a vector database. The quality of index construction determines whether the correct context can be obtained in the retrieval phase.</p>
</div>
<section class="ltx_subsubsection" id="S3.SS2.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS2.SSS1.5.1.1">III-B</span>1 </span>Chunking Strategy</h4>
<div class="ltx_para" id="S3.SS2.SSS1.p1">
<p class="ltx_p" id="S3.SS2.SSS1.p1.1">The most common method is to split the document into chunks on a fixed number of tokens (e.g., 100, 256, 512) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib88" title="">88</a>]</cite>. Larger chunks can capture more context, but they also generate more noise, requiring longer processing time and higher costs. While smaller chunks may not fully convey the necessary context, they do have less noise. However, chunks leads to truncation within sentences, prompting the optimization of a recursive splits and sliding window methods, enabling layered retrieval by merging globally related information across multiple retrieval processes <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib89" title="">89</a>]</cite>. Nevertheless, these approaches still cannot strike a balance between semantic completeness and context length. Therefore, methods like Small2Big have been proposed, where sentences (small) are used as the retrieval unit, and the preceding and following sentences are provided as (big) context to LLMs <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib90" title="">90</a>]</cite>.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S3.SS2.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS2.SSS2.5.1.1">III-B</span>2 </span>Metadata Attachments</h4>
<div class="ltx_para" id="S3.SS2.SSS2.p1">
<p class="ltx_p" id="S3.SS2.SSS2.p1.1">Chunks can be enriched with metadata information such as page number, file name, author,category timestamp. Subsequently, retrieval can be filtered based on this metadata, limiting the scope of the retrieval. Assigning different weights to document timestamps during retrieval can achieve time-aware RAG, ensuring the freshness of knowledge and avoiding outdated information.</p>
</div>
<div class="ltx_para" id="S3.SS2.SSS2.p2">
<p class="ltx_p" id="S3.SS2.SSS2.p2.1">In addition to extracting metadata from the original documents, metadata can also be artificially constructed. For example, adding summaries of paragraph, as well as introducing hypothetical questions. This method is also known as Reverse HyDE. Specifically, using LLM to generate questions that can be answered by the document, then calculating the similarity between the original question and the hypothetical question during retrieval to reduce the semantic gap between the question and the answer.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S3.SS2.SSS3">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS2.SSS3.5.1.1">III-B</span>3 </span>Structural Index</h4>
<div class="ltx_para" id="S3.SS2.SSS3.p1">
<p class="ltx_p" id="S3.SS2.SSS3.p1.1">One effective method for enhancing information retrieval is to establish a hierarchical structure for the documents. By constructing In structure, RAG system can expedite the retrieval and processing of pertinent data.</p>
</div>
<div class="ltx_para" id="S3.SS2.SSS3.p2">
<p class="ltx_p" id="S3.SS2.SSS3.p2.1"><em class="ltx_emph ltx_font_italic" id="S3.SS2.SSS3.p2.1.1">Hierarchical index structure</em>. File are arranged in parent-child relationships, with chunks linked to them. Data summaries are stored at each node, aiding in the swift traversal of data and assisting the RAG system in determining which chunks to extract. This approach can also mitigate the illusion caused by block extraction issues.</p>
</div>
<div class="ltx_para" id="S3.SS2.SSS3.p3">
<p class="ltx_p" id="S3.SS2.SSS3.p3.1"><em class="ltx_emph ltx_font_italic" id="S3.SS2.SSS3.p3.1.1">Knowledge Graph index</em>. Utilize KG in constructing the hierarchical structure of documents contributes to maintaining consistency. It delineates the connections between different concepts and entities, markedly reducing the potential for illusions. Another advantage is the transformation of the information retrieval process into instructions that LLM can comprehend, thereby enhancing the accuracy of knowledge retrieval and enabling LLM to generate contextually coherent responses, thus improving the overall efficiency of the RAG system. To capture the logical relationship between document content and structure, KGP <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib91" title="">91</a>]</cite> proposed a method of building an index between multiple documents using KG. This KG consists of nodes (representing paragraphs or structures in the documents, such as pages and tables) and edges (indicating semantic/lexical similarity between paragraphs or relationships within the document structure), effectively addressing knowledge retrieval and reasoning problems in a multi-document environment.
</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S3.SS3">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S3.SS3.5.1.1">III-C</span> </span><span class="ltx_text ltx_font_italic" id="S3.SS3.6.2">Query Optimization</span>
</h3>
<div class="ltx_para" id="S3.SS3.p1">
<p class="ltx_p" id="S3.SS3.p1.1">One of the primary challenges with Naive RAG is its direct reliance on the user’s original query as the basis for retrieval. Formulating a precise and clear question is difficult, and imprudent queries result in subpar retrieval effectiveness. Sometimes, the question itself is complex, and the language is not well-organized. Another difficulty lies in language complexity ambiguity. Language models often struggle when dealing with specialized vocabulary or ambiguous abbreviations with multiple meanings. For instance, they may not discern whether “LLM” refers to <span class="ltx_text ltx_font_italic" id="S3.SS3.p1.1.1">large language model</span> or a <span class="ltx_text ltx_font_italic" id="S3.SS3.p1.1.2">Master of Laws</span> in a legal context.</p>
</div>
<section class="ltx_subsubsection" id="S3.SS3.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS3.SSS1.5.1.1">III-C</span>1 </span>Query Expansion</h4>
<div class="ltx_para" id="S3.SS3.SSS1.p1">
<p class="ltx_p" id="S3.SS3.SSS1.p1.1">Expanding a single query into multiple queries enriches the content of the query, providing further context to address any lack of specific nuances, thereby ensuring the optimal relevance of the generated answers.</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS1.p2">
<p class="ltx_p" id="S3.SS3.SSS1.p2.1"><em class="ltx_emph ltx_font_italic" id="S3.SS3.SSS1.p2.1.1">Multi-Query</em>. By employing prompt engineering to expand queries via LLMs, these queries can then be executed in parallel. The expansion of queries is not random, but rather meticulously designed.</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS1.p3">
<p class="ltx_p" id="S3.SS3.SSS1.p3.1"><em class="ltx_emph ltx_font_italic" id="S3.SS3.SSS1.p3.1.1">Sub-Query</em>. The process of sub-question planning represents the generation of the necessary sub-questions to contextualize and fully answer the original question when combined. This process of adding relevant context is, in principle, similar to query expansion. Specifically, a complex question can be decomposed into a series of simpler sub-questions using the least-to-most prompting method <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib92" title="">92</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS1.p4">
<p class="ltx_p" id="S3.SS3.SSS1.p4.1"><em class="ltx_emph ltx_font_italic" id="S3.SS3.SSS1.p4.1.1">Chain-of-Verification(CoVe)</em>. The expanded queries undergo validation by LLM to achieve the effect of reducing hallucinations. Validated expanded queries typically exhibit higher reliability <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib93" title="">93</a>]</cite>.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S3.SS3.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS3.SSS2.5.1.1">III-C</span>2 </span>Query Transformation</h4>
<div class="ltx_para" id="S3.SS3.SSS2.p1">
<p class="ltx_p" id="S3.SS3.SSS2.p1.1">The core concept is to retrieve chunks based on a transformed query instead of the user’s original query.</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS2.p2">
<p class="ltx_p" id="S3.SS3.SSS2.p2.1"><em class="ltx_emph ltx_font_italic" id="S3.SS3.SSS2.p2.1.1">Query Rewrite</em>.The original queries are not always optimal for LLM retrieval, especially in real-world scenarios. Therefore, we can prompt LLM to rewrite the queries. In addition to using LLM for query rewriting, specialized smaller language models, such as RRR (Rewrite-retrieve-read) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>. The implementation of the query rewrite method in the Taobao, known as BEQUE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib9" title="">9</a>]</cite> has notably enhanced recall effectiveness for long-tail queries, resulting in a rise in GMV.</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS2.p3">
<p class="ltx_p" id="S3.SS3.SSS2.p3.1">Another query transformation method is to use prompt engineering to let LLM generate a query based on the original query for subsequent retrieval. HyDE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib11" title="">11</a>]</cite> construct hypothetical documents (assumed answers to the original query). It focuses on embedding similarity from answer to answer rather than seeking embedding similarity for the problem or query. Using the Step-back Prompting method <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib10" title="">10</a>]</cite>, the original query is abstracted to generate a high-level concept question (step-back question). In the RAG system, both the step-back question and the original query are used for retrieval, and both the results are utilized as the basis for language model answer generation.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S3.SS3.SSS3">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS3.SSS3.5.1.1">III-C</span>3 </span>Query Routing</h4>
<div class="ltx_para" id="S3.SS3.SSS3.p1">
<p class="ltx_p" id="S3.SS3.SSS3.p1.1">Based on varying queries, routing to distinct RAG pipeline,which is suitable for a versatile RAG system designed to accommodate diverse scenarios.
</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS3.p2">
<p class="ltx_p" id="S3.SS3.SSS3.p2.1"><em class="ltx_emph ltx_font_italic" id="S3.SS3.SSS3.p2.1.1">Metadata Router/ Filter</em>. The first step involves extracting keywords (entity) from the query, followed by filtering based on the keywords and metadata within the chunks to narrow down the search scope.</p>
</div>
<div class="ltx_para" id="S3.SS3.SSS3.p3">
<p class="ltx_p" id="S3.SS3.SSS3.p3.1"><em class="ltx_emph ltx_font_italic" id="S3.SS3.SSS3.p3.1.1">Semantic Router</em> is another method of routing involves leveraging the semantic information of the query. Specific apprach see Semantic Router <span class="ltx_note ltx_role_footnote" id="footnote6"><sup class="ltx_note_mark">6</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">6</sup><span class="ltx_tag ltx_tag_note">6</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/aurelio-labs/semantic-router" title="">https://github.com/aurelio-labs/semantic-router</a></span></span></span>. Certainly, a hybrid routing approach can also be employed, combining both semantic and metadata-based methods for enhanced query routing.</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S3.SS4">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S3.SS4.5.1.1">III-D</span> </span><span class="ltx_text ltx_font_italic" id="S3.SS4.6.2">Embedding</span>
</h3>
<div class="ltx_para" id="S3.SS4.p1">
<p class="ltx_p" id="S3.SS4.p1.1">In RAG, retrieval is achieved by calculating the similarity (e.g. cosine similarity) between the embeddings of the question and document chunks, where the semantic representation capability of embedding models plays a key role. This mainly includes a sparse encoder (BM25) and a dense retriever (BERT architecture Pre-training language models). Recent research has introduced prominent embedding models such as AngIE, Voyage, BGE,etc <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib94" title="">94</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib95" title="">95</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib96" title="">96</a>]</cite>, which are benefit from multi-task instruct tuning. Hugging Face’s MTEB leaderboard <span class="ltx_note ltx_role_footnote" id="footnote7"><sup class="ltx_note_mark">7</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">7</sup><span class="ltx_tag ltx_tag_note">7</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://huggingface.co/spaces/mteb/leaderboard" title="">https://huggingface.co/spaces/mteb/leaderboard</a></span></span></span> evaluates embedding models across 8 tasks, covering 58 datasests. Additionally, C-MTEB focuses on Chinese capability, covering 6 tasks and 35 datasets. There is no one-size-fits-all answer to “which embedding model to use.” However, some specific models are better suited for particular use cases.</p>
</div>
<section class="ltx_subsubsection" id="S3.SS4.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS4.SSS1.5.1.1">III-D</span>1 </span>Mix/hybrid Retrieval </h4>
<div class="ltx_para" id="S3.SS4.SSS1.p1">
<p class="ltx_p" id="S3.SS4.SSS1.p1.1">Sparse and dense embedding approaches capture different relevance features and can benefit from each other by leveraging complementary relevance information. For instance, sparse retrieval models can be used to provide initial search results for training dense retrieval models. Additionally, pre-training language models (PLMs) can be utilized to learn term weights to enhance sparse retrieval. Specifically, it also demonstrates that sparse retrieval models can enhance the zero-shot retrieval capability of dense retrieval models and assist dense retrievers in handling queries containing rare entities, thereby improving robustness.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S3.SS4.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S3.SS4.SSS2.5.1.1">III-D</span>2 </span>Fine-tuning Embedding Model</h4>
<div class="ltx_para" id="S3.SS4.SSS2.p1">
<p class="ltx_p" id="S3.SS4.SSS2.p1.1">In instances where the context significantly deviates from pre-training corpus, particularly within highly specialized disciplines such as healthcare, legal practice, and other sectors replete with proprietary jargon, fine-tuning the embedding model on your own domain dataset becomes essential to mitigate such discrepancies.</p>
</div>
<div class="ltx_para" id="S3.SS4.SSS2.p2">
<p class="ltx_p" id="S3.SS4.SSS2.p2.1">In addition to supplementing domain knowledge, another purpose of fine-tuning is to align the retriever and generator, for example, using the results of LLM as the supervision signal for fine-tuning, known as LSR (LM-supervised Retriever). PROMPTAGATOR <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib21" title="">21</a>]</cite> utilizes the LLM as a few-shot query generator to create task-specific retrievers, addressing challenges in supervised fine-tuning, particularly in data-scarce domains. Another approach, LLM-Embedder <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib97" title="">97</a>]</cite>, exploits LLMs to generate reward signals across multiple downstream tasks. The retriever is fine-tuned with two types of supervised signals: hard labels for the dataset and soft rewards from the LLMs. This dual-signal approach fosters a more effective fine-tuning process, tailoring the embedding model to diverse downstream applications. REPLUG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>]</cite> utilizes a retriever and an LLM to calculate the probability distributions of the retrieved documents and then performs supervised training by computing the KL divergence. This straightforward and effective training method enhances the performance of the retrieval model by using an LM as the supervisory signal, eliminating the need for specific cross-attention mechanisms. Moreover, inspired by RLHF (Reinforcement Learning from Human Feedback), utilizing LM-based feedback to reinforce the retriever through reinforcement learning.</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S3.SS5">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S3.SS5.5.1.1">III-E</span> </span><span class="ltx_text ltx_font_italic" id="S3.SS5.6.2">Adapter</span>
</h3>
<div class="ltx_para" id="S3.SS5.p1">
<p class="ltx_p" id="S3.SS5.p1.1">Fine-tuning models may present challenges, such as integrating functionality through an API or addressing constraints arising from limited local computational resources. Consequently, some approaches opt to incorporate an external adapter to aid in alignment.</p>
</div>
<div class="ltx_para" id="S3.SS5.p2">
<p class="ltx_p" id="S3.SS5.p2.1">To optimize the multi-task capabilities of LLM, UPRISE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>]</cite> trained a lightweight prompt retriever that can automatically retrieve prompts from a pre-built prompt pool that are suitable for a given zero-shot task input. AAR (Augmentation-Adapted Retriver) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib47" title="">47</a>]</cite> introduces a universal adapter designed to accommodate multiple downstream tasks. While PRCA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib69" title="">69</a>]</cite> add a pluggable reward-driven contextual adapter to enhance performance on specific tasks. BGM <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>]</cite> keeps the retriever and LLM fixed,and trains a bridge Seq2Seq model in between. The bridge model aims to transform the retrieved information into a format that LLMs can work with effectively, allowing it to not only rerank but also dynamically select passages for each query, and potentially employ more advanced strategies like repetition. Furthermore, PKG introduces an innovative method for integrating knowledge into white-box models via directive fine-tuning <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib75" title="">75</a>]</cite>. In this approach, the retriever module is directly substituted to generate relevant documents according to a query. This method assists in addressing the difficulties encountered during the fine-tuning process and enhances model performance.</p>
</div>
</section>
</section>
<section class="ltx_section" id="S4">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">IV </span><span class="ltx_text ltx_font_smallcaps" id="S4.1.1">Generation</span>
</h2>
<div class="ltx_para" id="S4.p1">
<p class="ltx_p" id="S4.p1.1">After retrieval, it is not a good practice to directly input all the retrieved information to the LLM for answering questions. Following will introduce adjustments from two perspectives: adjusting the retrieved content and adjusting the LLM.</p>
</div>
<section class="ltx_subsection" id="S4.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S4.SS1.5.1.1">IV-A</span> </span><span class="ltx_text ltx_font_italic" id="S4.SS1.6.2">Context Curation</span>
</h3>
<div class="ltx_para" id="S4.SS1.p1">
<p class="ltx_p" id="S4.SS1.p1.1">Redundant information can interfere with the final generation of LLM, and overly long contexts can also lead LLM to the “Lost in the middle” problem <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib98" title="">98</a>]</cite>. Like humans, LLM tends to only focus on the beginning and end of long texts, while forgetting the middle portion. Therefore, in the RAG system, we typically need to further process the retrieved content.</p>
</div>
<section class="ltx_subsubsection" id="S4.SS1.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S4.SS1.SSS1.5.1.1">IV-A</span>1 </span>Reranking</h4>
<div class="ltx_para" id="S4.SS1.SSS1.p1">
<p class="ltx_p" id="S4.SS1.SSS1.p1.1">Reranking fundamentally reorders document chunks to highlight the most pertinent results first, effectively reducing the overall document pool, severing a dual purpose in information retrieval, acting as both an enhancer and a filter, delivering refined inputs for more precise language model processing <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib70" title="">70</a>]</cite>. Reranking can be performed using rule-based methods that depend on predefined metrics like Diversity, Relevance, and MRR, or model-based approaches like Encoder-Decoder models from the BERT series (e.g., SpanBERT), specialized reranking models such as Cohere rerank or bge-raranker-large, and general large language models like GPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib12" title="">12</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib99" title="">99</a>]</cite>.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S4.SS1.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S4.SS1.SSS2.5.1.1">IV-A</span>2 </span>Context Selection/Compression</h4>
<div class="ltx_para" id="S4.SS1.SSS2.p1">
<p class="ltx_p" id="S4.SS1.SSS2.p1.1">A common misconception in the RAG process is the belief that retrieving as many relevant documents as possible and concatenating them to form a lengthy retrieval prompt is beneficial. However, excessive context can introduce more noise, diminishing the LLM’s perception of key information .</p>
</div>
<div class="ltx_para" id="S4.SS1.SSS2.p2">
<p class="ltx_p" id="S4.SS1.SSS2.p2.1">(Long) LLMLingua <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib100" title="">100</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib101" title="">101</a>]</cite> utilize small language models (SLMs) such as GPT-2 Small or LLaMA-7B, to detect and remove unimportant tokens, transforming it into a form that is challenging for humans to comprehend but well understood by LLMs. This approach presents a direct and practical method for prompt compression, eliminating the need for additional training of LLMs while balancing language integrity and compression ratio. PRCA tackled this issue by training an information extractor <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib69" title="">69</a>]</cite>. Similarly, RECOMP adopts a comparable approach by training an information condenser using contrastive learning <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>]</cite>. Each training data point consists of one positive sample and five negative samples, and the encoder undergoes training using contrastive loss throughout this process <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib102" title="">102</a>]</cite> .</p>
</div>
<div class="ltx_para" id="S4.SS1.SSS2.p3">
<p class="ltx_p" id="S4.SS1.SSS2.p3.1">In addition to compressing the context, reducing the number of documents aslo helps improve the accuracy of the model’s answers. Ma et al. <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib103" title="">103</a>]</cite> propose the “Filter-Reranker” paradigm, which combines the strengths of LLMs and SLMs. In this paradigm, SLMs serve as filters, while LLMs function as reordering agents. The research shows that instructing LLMs to rearrange challenging samples identified by SLMs leads to significant improvements in various Information Extraction (IE) tasks. Another straightforward and effective approach involves having the LLM evaluate the retrieved content before generating the final answer. This allows the LLM to filter out documents with poor relevance through LLM critique. For instance, in Chatlaw <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib104" title="">104</a>]</cite>, the LLM is prompted to self-suggestion on the referenced legal provisions to assess their relevance.</p>
</div>
</section>
</section>
<section class="ltx_subsection" id="S4.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S4.SS2.5.1.1">IV-B</span> </span><span class="ltx_text ltx_font_italic" id="S4.SS2.6.2">LLM Fine-tuning</span>
</h3>
<div class="ltx_para" id="S4.SS2.p1">
<p class="ltx_p" id="S4.SS2.p1.1">Targeted fine-tuning based on the scenario and data characteristics on LLMs can yield better results. This is also one of the greatest advantages of using on-premise LLMs. When LLMs lack data in a specific domain, additional knowledge can be provided to the LLM through fine-tuning. Huggingface’s fine-tuning data can also be used as an initial step.
</p>
</div>
<div class="ltx_para" id="S4.SS2.p2">
<p class="ltx_p" id="S4.SS2.p2.1">Another benefit of fine-tuning is the ability to adjust the model’s input and output. For example, it can enable LLM to adapt to specific data formats and generate responses in a particular style as instructed <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib37" title="">37</a>]</cite>. For retrieval tasks that engage with structured data, the SANTA framework <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib76" title="">76</a>]</cite> implements a tripartite training regimen to effectively encapsulate both structural and semantic nuances. The initial phase focuses on the retriever, where contrastive learning is harnessed to refine the query and document embeddings.</p>
</div>
<div class="ltx_para" id="S4.SS2.p3">
<p class="ltx_p" id="S4.SS2.p3.1">Aligning LLM outputs with human or retriever preferences through reinforcement learning is a potential approach. For instance, manually annotating the final generated answers and then providing feedback through reinforcement learning. In addition to aligning with human preferences, it is also possible to align with the preferences of fine-tuned models and retrievers <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib79" title="">79</a>]</cite>. When circumstances prevent access to powerful proprietary models or larger parameter open-source models, a simple and effective method is to distill the more powerful models(e.g. GPT-4). Fine-tuning of LLM can also be coordinated with fine-tuning of the retriever to align preferences. A typical approach, such as RA-DIT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite>, aligns the scoring functions between Retriever and Generator using KL divergence.</p>
</div>
</section>
</section>
<section class="ltx_section" id="S5">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">V </span><span class="ltx_text ltx_font_smallcaps" id="S5.1.1">Augmentation process in RAG</span>
</h2>
<div class="ltx_para" id="S5.p1">
<p class="ltx_p" id="S5.p1.1">In the domain of RAG, the standard practice often involves a singular (once) retrieval step followed by generation, which can lead to inefficiencies and sometimes is typically insufficient for complex problems demanding multi-step reasoning, as it provides a limited scope of information <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib105" title="">105</a>]</cite>. Many studies have optimized the retrieval process in response to this issue, and we have summarised them in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S5.F5" title="Figure 5 ‣ V Augmentation process in RAG ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">5</span></a>.</p>
</div>
<figure class="ltx_figure" id="S5.F5"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="314" id="S5.F5.g1" src="extracted/5498883/images/aug_process.png" width="598"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 5: </span>In addition to the most common once retrieval, RAG also includes three types of retrieval augmentation processes. (left) Iterative retrieval involves alternating between retrieval and generation, allowing for richer and more targeted context from the knowledge base at each step. (Middle) Recursive retrieval involves gradually refining the user query and breaking down the problem into sub-problems, then continuously solving complex problems through retrieval and generation. (Right) Adaptive retrieval focuses on enabling the RAG system to autonomously determine whether external knowledge retrieval is necessary and when to stop retrieval and generation, often utilizing LLM-generated special tokens for control.</figcaption>
</figure>
<section class="ltx_subsection" id="S5.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S5.SS1.5.1.1">V-A</span> </span><span class="ltx_text ltx_font_italic" id="S5.SS1.6.2">Iterative Retrieval</span>
</h3>
<div class="ltx_para" id="S5.SS1.p1">
<p class="ltx_p" id="S5.SS1.p1.1">Iterative retrieval is a process where the knowledge base is repeatedly searched based on the initial query and the text generated so far, providing a more comprehensive knowledge base for LLMs. This approach has been shown to enhance the robustness of subsequent answer generation by offering additional contextual references through multiple retrieval iterations. However, it may be affected by semantic discontinuity and the accumulation of irrelevant information. ITER-RETGEN <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite> employs a synergistic approach that leverages “retrieval-enhanced generation” alongside “generation-enhanced retrieval” for tasks that necessitate the reproduction of specific information. The model harnesses the content required to address the input task as a contextual basis for retrieving pertinent knowledge, which in turn facilitates the generation of improved responses in subsequent iterations.</p>
</div>
</section>
<section class="ltx_subsection" id="S5.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S5.SS2.5.1.1">V-B</span> </span><span class="ltx_text ltx_font_italic" id="S5.SS2.6.2">Recursive Retrieval</span>
</h3>
<div class="ltx_para" id="S5.SS2.p1">
<p class="ltx_p" id="S5.SS2.p1.1">Recursive retrieval is often used in information retrieval and NLP to improve the depth and relevance of search results. The process involves iteratively refining search queries based on the results obtained from previous searches. Recursive Retrieval aims to enhance the search experience by gradually converging on the most pertinent information through a feedback loop. IRCoT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib61" title="">61</a>]</cite> uses chain-of-thought to guide the retrieval process and refines the CoT with the obtained retrieval results. ToC <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib57" title="">57</a>]</cite> creates a clarification tree that systematically optimizes the ambiguous parts in the Query. It can be particularly useful in complex search scenarios where the user’s needs are not entirely clear from the outset or where the information sought is highly specialized or nuanced. The recursive nature of the process allows for continuous learning and adaptation to the user’s requirements, often resulting in improved satisfaction with the search outcomes.</p>
</div>
<div class="ltx_para" id="S5.SS2.p2">
<p class="ltx_p" id="S5.SS2.p2.1">To address specific data scenarios, recursive retrieval and multi-hop retrieval techniques are utilized together. Recursive retrieval involves a structured index to process and retrieve data in a hierarchical manner, which may include summarizing sections of a document or lengthy PDF before performing a retrieval based on this summary. Subsequently, a secondary retrieval within the document refines the search, embodying the recursive nature of the process. In contrast, multi-hop retrieval is designed to delve deeper into graph-structured data sources, extracting interconnected information <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib106" title="">106</a>]</cite>.</p>
</div>
</section>
<section class="ltx_subsection" id="S5.SS3">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S5.SS3.5.1.1">V-C</span> </span><span class="ltx_text ltx_font_italic" id="S5.SS3.6.2">Adaptive Retrieval</span>
</h3>
<div class="ltx_para" id="S5.SS3.p1">
<p class="ltx_p" id="S5.SS3.p1.1">Adaptive retrieval methods, exemplified by Flare <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite> and Self-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite>, refine the RAG framework by enabling LLMs to actively determine the optimal moments and content for retrieval, thus enhancing the efficiency and relevance of the information sourced.</p>
</div>
<div class="ltx_para" id="S5.SS3.p2">
<p class="ltx_p" id="S5.SS3.p2.1">These methods are part of a broader trend wherein LLMs employ active judgment in their operations, as seen in model agents like AutoGPT, Toolformer, and Graph-Toolformer <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib107" title="">107</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib108" title="">108</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib109" title="">109</a>]</cite>. Graph-Toolformer, for instance, divides its retrieval process into distinct steps where LLMs proactively use retrievers, apply Self-Ask techniques, and employ few-shot prompts to initiate search queries. This proactive stance allows LLMs to decide when to search for necessary information, akin to how an agent utilizes tools.</p>
</div>
<div class="ltx_para" id="S5.SS3.p3">
<p class="ltx_p" id="S5.SS3.p3.1">WebGPT <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib110" title="">110</a>]</cite> integrates a reinforcement learning framework to train the GPT-3 model in autonomously using a search engine during text generation. It navigates this process using special tokens that facilitate actions such as search engine queries, browsing results, and citing references, thereby expanding GPT-3’s capabilities through the use of external search engines. Flare automates timing retrieval by monitoring the confidence of the generation process, as indicated by the probability of generated terms <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite>. When the probability falls below a certain threshold would activates the retrieval system to collect relevant information, thus optimizing the retrieval cycle. Self-RAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite> introduces “reflection tokens” that allow the model to introspect its outputs. These tokens come in two varieties: “retrieve” and “critic”. The model autonomously decides when to activate retrieval, or alternatively, a predefined threshold may trigger the process. During retrieval, the generator conducts a fragment-level beam search across multiple paragraphs to derive the most coherent sequence. Critic scores are used to update the subdivision scores, with the flexibility to adjust these weights during inference, tailoring the model’s behavior. Self-RAG’s design obviates the need for additional classifiers or reliance on Natural Language Inference (NLI) models, thus streamlining the decision-making process for when to engage retrieval mechanisms and improving the model’s autonomous judgment capabilities in generating accurate responses.</p>
</div>
</section>
</section>
<section class="ltx_section" id="S6">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">VI </span><span class="ltx_text ltx_font_smallcaps" id="S6.1.1">Task and Evaluation</span>
</h2>
<div class="ltx_para" id="S6.p1">
<p class="ltx_p" id="S6.p1.1">The rapid advancement and growing adoption of RAG in the field of NLP have propelled the evaluation of RAG models to the forefront of research in the LLMs community. The primary objective of this evaluation is to comprehend and optimize the performance of RAG models across diverse application scenarios.This chapter will mainly introduce the main downstream tasks of RAG, datasets, and how to evaluate RAG systems.</p>
</div>
<section class="ltx_subsection" id="S6.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S6.SS1.5.1.1">VI-A</span> </span><span class="ltx_text ltx_font_italic" id="S6.SS1.6.2">Downstream Task</span>
</h3>
<div class="ltx_para" id="S6.SS1.p1">
<p class="ltx_p" id="S6.SS1.p1.1">The core task of RAG remains Question Answering (QA), including traditional single-hop/multi-hop QA, multiple-choice, domain-specific QA as well as long-form scenarios suitable for RAG. In addition to QA, RAG is continuously being expanded into multiple downstream tasks, such as Information Extraction (IE), dialogue generation, code search, etc. The main downstream tasks of RAG and their corresponding datasets are summarized in Table  <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.T2" title="TABLE II ‣ VI-A Downstream Task ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">II</span></a>.</p>
</div>
<figure class="ltx_table" id="S6.T2">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">TABLE II: </span>Downstream tasks and datasets of RAG</figcaption>
<table class="ltx_tabular ltx_centering ltx_align_middle" id="S6.T2.1">
<tr class="ltx_tr" id="S6.T2.1.1">
<td class="ltx_td ltx_align_left ltx_border_tt" id="S6.T2.1.1.1">Task</td>
<td class="ltx_td ltx_align_left ltx_border_tt" id="S6.T2.1.1.2">Sub Task</td>
<td class="ltx_td ltx_align_left ltx_border_tt" id="S6.T2.1.1.3">Dataset</td>
<td class="ltx_td ltx_align_left ltx_border_tt" id="S6.T2.1.1.4">Method</td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.2">
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.2.1">QA</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.2.2">Single-hop</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.2.3">Natural Qustion(NQ) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib111" title="">111</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.2.4">
<table class="ltx_tabular ltx_align_middle" id="S6.T2.1.2.4.1">
<tr class="ltx_tr" id="S6.T2.1.2.4.1.1">
<td class="ltx_td ltx_align_left" id="S6.T2.1.2.4.1.1.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib82" title="">82</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib52" title="">52</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib64" title="">64</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.2.4.1.2">
<td class="ltx_td ltx_align_left" id="S6.T2.1.2.4.1.2.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib62" title="">62</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib40" title="">40</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib54" title="">54</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib112" title="">112</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib43" title="">43</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib3" title="">3</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.2.4.1.3">
<td class="ltx_td ltx_align_left" id="S6.T2.1.2.4.1.3.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib44" title="">44</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>]</cite></td>
</tr>
</table>
</td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.3">
<td class="ltx_td" id="S6.T2.1.3.1"></td>
<td class="ltx_td" id="S6.T2.1.3.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.3.3">TriviaQA(TQA) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib113" title="">113</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.3.4">
<table class="ltx_tabular ltx_align_middle" id="S6.T2.1.3.4.1">
<tr class="ltx_tr" id="S6.T2.1.3.4.1.1">
<td class="ltx_td ltx_align_left" id="S6.T2.1.3.4.1.1.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib64" title="">64</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.3.4.1.2">
<td class="ltx_td ltx_align_left" id="S6.T2.1.3.4.1.2.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib62" title="">62</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib112" title="">112</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.3.4.1.3">
<td class="ltx_td ltx_align_left" id="S6.T2.1.3.4.1.3.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib43" title="">43</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib44" title="">44</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite></td>
</tr>
</table>
</td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.4">
<td class="ltx_td" id="S6.T2.1.4.1"></td>
<td class="ltx_td" id="S6.T2.1.4.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.4.3">SQuAD <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib114" title="">114</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.4.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib23" title="">23</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib69" title="">69</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib112" title="">112</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib32" title="">32</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.5">
<td class="ltx_td" id="S6.T2.1.5.1"></td>
<td class="ltx_td" id="S6.T2.1.5.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.5.3">Web Questions(WebQ) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib115" title="">115</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.5.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib30" title="">30</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib68" title="">68</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib3" title="">3</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.6">
<td class="ltx_td" id="S6.T2.1.6.1"></td>
<td class="ltx_td" id="S6.T2.1.6.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.6.3">PopQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib116" title="">116</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.6.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.7">
<td class="ltx_td" id="S6.T2.1.7.1"></td>
<td class="ltx_td" id="S6.T2.1.7.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.7.3">MS MARCO <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib117" title="">117</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.7.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib52" title="">52</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib40" title="">40</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.8">
<td class="ltx_td" id="S6.T2.1.8.1"></td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.8.2">Multi-hop</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.8.3">HotpotQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib118" title="">118</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.8.4">
<table class="ltx_tabular ltx_align_middle" id="S6.T2.1.8.4.1">
<tr class="ltx_tr" id="S6.T2.1.8.4.1.1">
<td class="ltx_td ltx_align_left" id="S6.T2.1.8.4.1.1.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib82" title="">82</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib31" title="">31</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib47" title="">47</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib23" title="">23</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib61" title="">61</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.8.4.1.2">
<td class="ltx_td ltx_align_left" id="S6.T2.1.8.4.1.2.1"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib91" title="">91</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib62" title="">62</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib69" title="">69</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib22" title="">22</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite></td>
</tr>
</table>
</td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.9">
<td class="ltx_td" id="S6.T2.1.9.1"></td>
<td class="ltx_td" id="S6.T2.1.9.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.9.3">2WikiMultiHopQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib119" title="">119</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.9.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib61" title="">61</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib91" title="">91</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib48" title="">48</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.10">
<td class="ltx_td" id="S6.T2.1.10.1"></td>
<td class="ltx_td" id="S6.T2.1.10.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.10.3">MuSiQue <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib120" title="">120</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.10.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib61" title="">61</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib91" title="">91</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.11">
<td class="ltx_td" id="S6.T2.1.11.1"></td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.11.2">Long-form QA</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.11.3">ELI5 <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib121" title="">121</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.11.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib49" title="">49</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib43" title="">43</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.12">
<td class="ltx_td" id="S6.T2.1.12.1"></td>
<td class="ltx_td" id="S6.T2.1.12.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.12.3">NarrativeQA(NQA) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib122" title="">122</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.12.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib63" title="">63</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib123" title="">123</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib60" title="">60</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.13">
<td class="ltx_td" id="S6.T2.1.13.1"></td>
<td class="ltx_td" id="S6.T2.1.13.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.13.3">ASQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib124" title="">124</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.13.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib57" title="">57</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.14">
<td class="ltx_td" id="S6.T2.1.14.1"></td>
<td class="ltx_td" id="S6.T2.1.14.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.14.3">QMSum(QM) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib125" title="">125</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.14.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib123" title="">123</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib60" title="">60</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.15">
<td class="ltx_td" id="S6.T2.1.15.1"></td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.15.2">Domain QA</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.15.3">Qasper <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib126" title="">126</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.15.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib63" title="">63</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib60" title="">60</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.16">
<td class="ltx_td" id="S6.T2.1.16.1"></td>
<td class="ltx_td" id="S6.T2.1.16.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.16.3">COVID-QA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib127" title="">127</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.16.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib35" title="">35</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib46" title="">46</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.17">
<td class="ltx_td" id="S6.T2.1.17.1"></td>
<td class="ltx_td" id="S6.T2.1.17.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.17.3">CMB <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib128" title="">128</a>]</cite>,MMCU_Medical <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib129" title="">129</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.17.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib81" title="">81</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.18">
<td class="ltx_td" id="S6.T2.1.18.1"></td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.18.2">Multi-Choice QA</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.18.3">QuALITY <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib130" title="">130</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.18.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib63" title="">63</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib60" title="">60</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.19">
<td class="ltx_td" id="S6.T2.1.19.1"></td>
<td class="ltx_td" id="S6.T2.1.19.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.19.3">ARC <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib131" title="">131</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.19.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.20">
<td class="ltx_td" id="S6.T2.1.20.1"></td>
<td class="ltx_td" id="S6.T2.1.20.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.20.3">CommonsenseQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib132" title="">132</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.20.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib58" title="">58</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib66" title="">66</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.21">
<td class="ltx_td" id="S6.T2.1.21.1"></td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.21.2">Graph QA</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.21.3">GraphQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib84" title="">84</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.21.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib84" title="">84</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.22">
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.22.1">Dialog</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.22.2">Dialog Generation</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.22.3">Wizard of Wikipedia (WoW) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib133" title="">133</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.22.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.23">
<td class="ltx_td" id="S6.T2.1.23.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.23.2">Personal Dialog</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.23.3">KBP <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib134" title="">134</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.23.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib135" title="">135</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib74" title="">74</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.24">
<td class="ltx_td" id="S6.T2.1.24.1"></td>
<td class="ltx_td" id="S6.T2.1.24.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.24.3">DuleMon <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib136" title="">136</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.24.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib74" title="">74</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.25">
<td class="ltx_td" id="S6.T2.1.25.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.25.2">Task-oriented Dialog</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.25.3">CamRest <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib137" title="">137</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.25.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib79" title="">79</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib78" title="">78</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.26">
<td class="ltx_td" id="S6.T2.1.26.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.26.2">Recommendation</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.26.3">Amazon(Toys,Sport,Beauty) <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib138" title="">138</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.26.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib40" title="">40</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib39" title="">39</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.27">
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.27.1">IE</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.27.2">Event Argument Extraction</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.27.3">WikiEvent<cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib139" title="">139</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.27.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib37" title="">37</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.28">
<td class="ltx_td" id="S6.T2.1.28.1"></td>
<td class="ltx_td" id="S6.T2.1.28.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.28.3">RAMS <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib140" title="">140</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.28.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib36" title="">36</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib37" title="">37</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.29">
<td class="ltx_td" id="S6.T2.1.29.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.29.2">Relation Extraction</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.29.3">T-REx <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib141" title="">141</a>]</cite>,ZsRE <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib142" title="">142</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.29.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.30">
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.30.1">Reasoning</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.30.2">Commonsense Reasoning</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.30.3">HellaSwag <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib143" title="">143</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.30.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib66" title="">66</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.31">
<td class="ltx_td" id="S6.T2.1.31.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.31.2">CoT Reasoning</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.31.3">CoT Reasoning <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib144" title="">144</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.31.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.32">
<td class="ltx_td" id="S6.T2.1.32.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.32.2">Complex Reasoning</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.32.3">CSQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib145" title="">145</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.32.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib55" title="">55</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.33">
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.33.1">Others</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.33.2">Language Understanding</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.33.3">MMLU <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib146" title="">146</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T2.1.33.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib47" title="">47</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib28" title="">28</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib43" title="">43</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.34">
<td class="ltx_td" id="S6.T2.1.34.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.34.2">Language Modeling</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.34.3">WikiText-103 <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib147" title="">147</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.34.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib29" title="">29</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib64" title="">64</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib71" title="">71</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib5" title="">5</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.35">
<td class="ltx_td" id="S6.T2.1.35.1"></td>
<td class="ltx_td" id="S6.T2.1.35.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.35.3">StrategyQA <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib148" title="">148</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.35.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib51" title="">51</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib55" title="">55</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib48" title="">48</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib58" title="">58</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.36">
<td class="ltx_td" id="S6.T2.1.36.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.36.2">Fact Checking/Verification</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.36.3">FEVER <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib149" title="">149</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.36.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib34" title="">34</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib13" title="">13</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.37">
<td class="ltx_td" id="S6.T2.1.37.1"></td>
<td class="ltx_td" id="S6.T2.1.37.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.37.3">PubHealth <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib150" title="">150</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.37.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib25" title="">25</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.38">
<td class="ltx_td" id="S6.T2.1.38.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.38.2">Text Generation</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.38.3">Biography <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib151" title="">151</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.38.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.39">
<td class="ltx_td" id="S6.T2.1.39.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.39.2">Text Summarization</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.39.3">WikiASP <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib152" title="">152</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.39.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib24" title="">24</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.40">
<td class="ltx_td" id="S6.T2.1.40.1"></td>
<td class="ltx_td" id="S6.T2.1.40.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.40.3">XSum <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib153" title="">153</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.40.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.41">
<td class="ltx_td" id="S6.T2.1.41.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.41.2">Text Classification</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.41.3">VioLens <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib154" title="">154</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.41.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib19" title="">19</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.42">
<td class="ltx_td" id="S6.T2.1.42.1"></td>
<td class="ltx_td" id="S6.T2.1.42.2"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.42.3">TREC <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib155" title="">155</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.42.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib33" title="">33</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.43">
<td class="ltx_td" id="S6.T2.1.43.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.43.2">Sentiment</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.43.3">SST-2 <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib156" title="">156</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.43.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib33" title="">33</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib20" title="">20</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib38" title="">38</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.44">
<td class="ltx_td" id="S6.T2.1.44.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.44.2">Code Search</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.44.3">CodeSearchNet <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib157" title="">157</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.44.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib76" title="">76</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.45">
<td class="ltx_td" id="S6.T2.1.45.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.45.2">Robustness Evaluation</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.45.3">NoMIRACL <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib56" title="">56</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.45.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib56" title="">56</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.46">
<td class="ltx_td" id="S6.T2.1.46.1"></td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.46.2">Math</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.46.3">GSM8K <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib158" title="">158</a>]</cite>
</td>
<td class="ltx_td ltx_align_left" id="S6.T2.1.46.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib73" title="">73</a>]</cite></td>
</tr>
<tr class="ltx_tr" id="S6.T2.1.47">
<td class="ltx_td ltx_border_b" id="S6.T2.1.47.1"></td>
<td class="ltx_td ltx_align_left ltx_border_b" id="S6.T2.1.47.2">Machine Translation</td>
<td class="ltx_td ltx_align_left ltx_border_b" id="S6.T2.1.47.3">JRC-Acquis <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib159" title="">159</a>]</cite>
</td>
<td class="ltx_td ltx_align_left ltx_border_b" id="S6.T2.1.47.4"><cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib17" title="">17</a>]</cite></td>
</tr>
</table>
</figure>
</section>
<section class="ltx_subsection" id="S6.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S6.SS2.5.1.1">VI-B</span> </span><span class="ltx_text ltx_font_italic" id="S6.SS2.6.2">Evaluation Target</span>
</h3>
<div class="ltx_para" id="S6.SS2.p1">
<p class="ltx_p" id="S6.SS2.p1.1">Historically, RAG models assessments have centered on their execution in specific downstream tasks. These evaluations employ established metrics suitable to the tasks at hand. For instance, question answering evaluations might rely on EM and F1 scores <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib45" title="">45</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib72" title="">72</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib59" title="">59</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib7" title="">7</a>]</cite>, whereas fact-checking tasks often hinge on Accuracy as the primary metric <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib4" title="">4</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib42" title="">42</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib14" title="">14</a>]</cite>. BLEU and ROUGE metrics are also commonly used to evaluate answer quality <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib26" title="">26</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib78" title="">78</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib52" title="">52</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib32" title="">32</a>]</cite>. Tools like RALLE, designed for the automatic evaluation of RAG applications, similarly base their assessments on these task-specific metrics <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib160" title="">160</a>]</cite>. Despite this, there is a notable paucity of research dedicated to evaluating the distinct characteristics of RAG models.The main evaluation objectives include:</p>
</div>
<div class="ltx_para" id="S6.SS2.p2">
<p class="ltx_p" id="S6.SS2.p2.1"><em class="ltx_emph ltx_font_italic" id="S6.SS2.p2.1.1">Retrieval Quality</em>. Evaluating the retrieval quality is crucial for determining the effectiveness of the context sourced by the retriever component. Standard metrics from the domains of search engines, recommendation systems, and information retrieval systems are employed to measure the performance of the RAG retrieval module. Metrics such as Hit Rate, MRR, and NDCG are commonly utilized for this purpose <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib161" title="">161</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib162" title="">162</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S6.SS2.p3">
<p class="ltx_p" id="S6.SS2.p3.1"><em class="ltx_emph ltx_font_italic" id="S6.SS2.p3.1.1">Generation Quality</em>. The assessment of generation quality centers on the generator’s capacity to synthesize coherent and relevant answers from the retrieved context. This evaluation can be categorized based on the content’s objectives: unlabeled and labeled content. For unlabeled content, the evaluation encompasses the faithfulness, relevance, and non-harmfulness of the generated answers. In contrast, for labeled content, the focus is on the accuracy of the information produced by the model <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib161" title="">161</a>]</cite>. Additionally, both retrieval and generation quality assessments can be conducted through manual or automatic evaluation methods <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib161" title="">161</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib29" title="">29</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib163" title="">163</a>]</cite>.</p>
</div>
</section>
<section class="ltx_subsection" id="S6.SS3">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S6.SS3.5.1.1">VI-C</span> </span><span class="ltx_text ltx_font_italic" id="S6.SS3.6.2">Evaluation Aspects</span>
</h3>
<div class="ltx_para" id="S6.SS3.p1">
<p class="ltx_p" id="S6.SS3.p1.1">Contemporary evaluation practices of RAG models emphasize three primary quality scores and four essential abilities, which collectively inform the evaluation of the two principal targets of the RAG model: retrieval and generation.</p>
</div>
<section class="ltx_subsubsection" id="S6.SS3.SSS1">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S6.SS3.SSS1.5.1.1">VI-C</span>1 </span>Quality Scores</h4>
<div class="ltx_para" id="S6.SS3.SSS1.p1">
<p class="ltx_p" id="S6.SS3.SSS1.p1.1">Quality scores include context relevance, answer faithfulness, and answer relevance. These quality scores evaluate the efficiency of the RAG model from different perspectives in the process of information retrieval and generation <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib164" title="">164</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib165" title="">165</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib166" title="">166</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS1.p2">
<p class="ltx_p" id="S6.SS3.SSS1.p2.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS1.p2.1.1">Context Relevance</em> evaluates the precision and specificity of the retrieved context, ensuring relevance and minimizing processing costs associated with extraneous content.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS1.p3">
<p class="ltx_p" id="S6.SS3.SSS1.p3.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS1.p3.1.1">Answer Faithfulness</em> ensures that the generated answers remain true to the retrieved context, maintaining consistency and avoiding contradictions.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS1.p4">
<p class="ltx_p" id="S6.SS3.SSS1.p4.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS1.p4.1.1">Answer Relevance</em> requires that the generated answers are directly pertinent to the posed questions, effectively addressing the core inquiry.</p>
</div>
</section>
<section class="ltx_subsubsection" id="S6.SS3.SSS2">
<h4 class="ltx_title ltx_title_subsubsection">
<span class="ltx_tag ltx_tag_subsubsection"><span class="ltx_text" id="S6.SS3.SSS2.5.1.1">VI-C</span>2 </span>Required Abilities</h4>
<div class="ltx_para" id="S6.SS3.SSS2.p1">
<p class="ltx_p" id="S6.SS3.SSS2.p1.1">RAG evaluation also encompasses four abilities indicative of its adaptability and efficiency: noise robustness, negative rejection, information integration, and counterfactual robustness <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib167" title="">167</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib168" title="">168</a>]</cite>. These abilities are critical for the model’s performance under various challenges and complex scenarios, impacting the quality scores.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS2.p2">
<p class="ltx_p" id="S6.SS3.SSS2.p2.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS2.p2.1.1">Noise Robustness</em> appraises the model’s capability to manage noise documents that are question-related but lack substantive information.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS2.p3">
<p class="ltx_p" id="S6.SS3.SSS2.p3.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS2.p3.1.1">Negative Rejection</em> assesses the model’s discernment in refraining from responding when the retrieved documents do not contain the necessary knowledge to answer a question.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS2.p4">
<p class="ltx_p" id="S6.SS3.SSS2.p4.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS2.p4.1.1">Information Integration</em> evaluates the model’s proficiency in synthesizing information from multiple documents to address complex questions.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS2.p5">
<p class="ltx_p" id="S6.SS3.SSS2.p5.1"><em class="ltx_emph ltx_font_italic" id="S6.SS3.SSS2.p5.1.1">Counterfactual Robustness</em> tests the model’s ability to recognize and disregard known inaccuracies within documents, even when instructed about potential misinformation.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS2.p6">
<p class="ltx_p" id="S6.SS3.SSS2.p6.1">Context relevance and noise robustness are important for evaluating the quality of retrieval, while answer faithfulness, answer relevance, negative rejection, information integration, and counterfactual robustness are important for evaluating the quality of generation.</p>
</div>
<div class="ltx_para" id="S6.SS3.SSS2.p7">
<p class="ltx_p" id="S6.SS3.SSS2.p7.1">The specific metrics for each evaluation aspect are summarized in Table <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.T3" title="TABLE III ‣ VI-C2 Required Abilities ‣ VI-C Evaluation Aspects ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">III</span></a>. It is essential to recognize that these metrics, derived from related work, are traditional measures and do not yet represent a mature or standardized approach for quantifying RAG evaluation aspects. Custom metrics tailored to the nuances of RAG models, though not included here, have also been developed in some evaluation studies.</p>
</div>
<figure class="ltx_table" id="S6.T3">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">TABLE III: </span>Summary of metrics applicable for evaluation aspects of RAG</figcaption>
<table class="ltx_tabular ltx_centering ltx_align_middle" id="S6.T3.1">
<tbody class="ltx_tbody">
<tr class="ltx_tr" id="S6.T3.1.1.1">
<td class="ltx_td ltx_border_tt" id="S6.T3.1.1.1.1"></td>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.2">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.2.1">
<p class="ltx_p" id="S6.T3.1.1.1.2.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.2.1.2"><span class="ltx_text" id="S6.T3.1.1.1.2.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.2.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.2.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.2.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.2.1.2.1.1.1.1.1" style="font-size:80%;">Context</span></span></span>
<span class="ltx_tr" id="S6.T3.1.1.1.2.1.2.1.1.2">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.2.1.2.1.1.2.1"><span class="ltx_text" id="S6.T3.1.1.1.2.1.2.1.1.2.1.1" style="font-size:80%;">Relevance</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.2.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.3">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.3.1">
<p class="ltx_p" id="S6.T3.1.1.1.3.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.3.1.2"><span class="ltx_text" id="S6.T3.1.1.1.3.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.3.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.3.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.3.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.3.1.2.1.1.1.1.1" style="font-size:80%;">Faithfulness</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.3.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.4">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.4.1">
<p class="ltx_p" id="S6.T3.1.1.1.4.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.4.1.2"><span class="ltx_text" id="S6.T3.1.1.1.4.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.4.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.4.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.4.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.4.1.2.1.1.1.1.1" style="font-size:80%;">Answer</span></span></span>
<span class="ltx_tr" id="S6.T3.1.1.1.4.1.2.1.1.2">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.4.1.2.1.1.2.1"><span class="ltx_text" id="S6.T3.1.1.1.4.1.2.1.1.2.1.1" style="font-size:80%;">Relevance</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.4.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.5">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.5.1">
<p class="ltx_p" id="S6.T3.1.1.1.5.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.5.1.2"><span class="ltx_text" id="S6.T3.1.1.1.5.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.5.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.5.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.5.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.5.1.2.1.1.1.1.1" style="font-size:80%;">Noise</span></span></span>
<span class="ltx_tr" id="S6.T3.1.1.1.5.1.2.1.1.2">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.5.1.2.1.1.2.1"><span class="ltx_text" id="S6.T3.1.1.1.5.1.2.1.1.2.1.1" style="font-size:80%;">Robustness</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.5.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.6">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.6.1">
<p class="ltx_p" id="S6.T3.1.1.1.6.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.6.1.2"><span class="ltx_text" id="S6.T3.1.1.1.6.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.6.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.6.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.6.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.6.1.2.1.1.1.1.1" style="font-size:80%;">Negative</span></span></span>
<span class="ltx_tr" id="S6.T3.1.1.1.6.1.2.1.1.2">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.6.1.2.1.1.2.1"><span class="ltx_text" id="S6.T3.1.1.1.6.1.2.1.1.2.1.1" style="font-size:80%;">Rejection</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.6.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.7">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.7.1">
<p class="ltx_p" id="S6.T3.1.1.1.7.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.7.1.2"><span class="ltx_text" id="S6.T3.1.1.1.7.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.7.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.7.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.7.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.7.1.2.1.1.1.1.1" style="font-size:80%;">Information</span></span></span>
<span class="ltx_tr" id="S6.T3.1.1.1.7.1.2.1.1.2">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.7.1.2.1.1.2.1"><span class="ltx_text" id="S6.T3.1.1.1.7.1.2.1.1.2.1.1" style="font-size:80%;">Integration</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.7.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T3.1.1.1.8">
<div class="ltx_block ltx_align_top" id="S6.T3.1.1.1.8.1">
<p class="ltx_p" id="S6.T3.1.1.1.8.1.1"></p>
<p class="ltx_p" id="S6.T3.1.1.1.8.1.2"><span class="ltx_text" id="S6.T3.1.1.1.8.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T3.1.1.1.8.1.2.1.1">
<span class="ltx_tr" id="S6.T3.1.1.1.8.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.8.1.2.1.1.1.1"><span class="ltx_text" id="S6.T3.1.1.1.8.1.2.1.1.1.1.1" style="font-size:80%;">Counterfactual</span></span></span>
<span class="ltx_tr" id="S6.T3.1.1.1.8.1.2.1.1.2">
<span class="ltx_td ltx_align_center" id="S6.T3.1.1.1.8.1.2.1.1.2.1"><span class="ltx_text" id="S6.T3.1.1.1.8.1.2.1.1.2.1.1" style="font-size:80%;">Robustness</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T3.1.1.1.8.1.3"></p>
</div>
</th>
</tr>
<tr class="ltx_tr" id="S6.T3.1.2.2">
<td class="ltx_td ltx_align_left ltx_border_t" id="S6.T3.1.2.2.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.1.1">Accuracy</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.2.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.3">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.3.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.4">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.4.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.5">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.5.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.6">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.6.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.7">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.7.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T3.1.2.2.8">
<p class="ltx_p ltx_align_top" id="S6.T3.1.2.2.8.1">✓</p>
</td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.3.3">
<td class="ltx_td ltx_align_left" id="S6.T3.1.3.3.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.3.3.1.1">EM</p>
</td>
<td class="ltx_td" id="S6.T3.1.3.3.2"></td>
<td class="ltx_td" id="S6.T3.1.3.3.3"></td>
<td class="ltx_td" id="S6.T3.1.3.3.4"></td>
<td class="ltx_td" id="S6.T3.1.3.3.5"></td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.3.3.6">
<p class="ltx_p ltx_align_top" id="S6.T3.1.3.3.6.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.3.3.7"></td>
<td class="ltx_td" id="S6.T3.1.3.3.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.4.4">
<td class="ltx_td ltx_align_left" id="S6.T3.1.4.4.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.4.4.1.1">Recall</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.4.4.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.4.4.2.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.4.4.3"></td>
<td class="ltx_td" id="S6.T3.1.4.4.4"></td>
<td class="ltx_td" id="S6.T3.1.4.4.5"></td>
<td class="ltx_td" id="S6.T3.1.4.4.6"></td>
<td class="ltx_td" id="S6.T3.1.4.4.7"></td>
<td class="ltx_td" id="S6.T3.1.4.4.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.5.5">
<td class="ltx_td ltx_align_left" id="S6.T3.1.5.5.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.5.5.1.1">Precision</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.5.5.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.5.5.2.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.5.5.3"></td>
<td class="ltx_td" id="S6.T3.1.5.5.4"></td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.5.5.5">
<p class="ltx_p ltx_align_top" id="S6.T3.1.5.5.5.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.5.5.6"></td>
<td class="ltx_td" id="S6.T3.1.5.5.7"></td>
<td class="ltx_td" id="S6.T3.1.5.5.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.6.6">
<td class="ltx_td ltx_align_left" id="S6.T3.1.6.6.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.6.6.1.1">R-Rate</p>
</td>
<td class="ltx_td" id="S6.T3.1.6.6.2"></td>
<td class="ltx_td" id="S6.T3.1.6.6.3"></td>
<td class="ltx_td" id="S6.T3.1.6.6.4"></td>
<td class="ltx_td" id="S6.T3.1.6.6.5"></td>
<td class="ltx_td" id="S6.T3.1.6.6.6"></td>
<td class="ltx_td" id="S6.T3.1.6.6.7"></td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.6.6.8">
<p class="ltx_p ltx_align_top" id="S6.T3.1.6.6.8.1">✓</p>
</td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.7.7">
<td class="ltx_td ltx_align_left" id="S6.T3.1.7.7.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.7.7.1.1">Cosine Similarity</p>
</td>
<td class="ltx_td" id="S6.T3.1.7.7.2"></td>
<td class="ltx_td" id="S6.T3.1.7.7.3"></td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.7.7.4">
<p class="ltx_p ltx_align_top" id="S6.T3.1.7.7.4.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.7.7.5"></td>
<td class="ltx_td" id="S6.T3.1.7.7.6"></td>
<td class="ltx_td" id="S6.T3.1.7.7.7"></td>
<td class="ltx_td" id="S6.T3.1.7.7.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.8.8">
<td class="ltx_td ltx_align_left" id="S6.T3.1.8.8.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.8.8.1.1">Hit Rate</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.8.8.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.8.8.2.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.8.8.3"></td>
<td class="ltx_td" id="S6.T3.1.8.8.4"></td>
<td class="ltx_td" id="S6.T3.1.8.8.5"></td>
<td class="ltx_td" id="S6.T3.1.8.8.6"></td>
<td class="ltx_td" id="S6.T3.1.8.8.7"></td>
<td class="ltx_td" id="S6.T3.1.8.8.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.9.9">
<td class="ltx_td ltx_align_left" id="S6.T3.1.9.9.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.9.9.1.1">MRR</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.9.9.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.9.9.2.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.9.9.3"></td>
<td class="ltx_td" id="S6.T3.1.9.9.4"></td>
<td class="ltx_td" id="S6.T3.1.9.9.5"></td>
<td class="ltx_td" id="S6.T3.1.9.9.6"></td>
<td class="ltx_td" id="S6.T3.1.9.9.7"></td>
<td class="ltx_td" id="S6.T3.1.9.9.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.10.10">
<td class="ltx_td ltx_align_left" id="S6.T3.1.10.10.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.10.10.1.1">NDCG</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.10.10.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.10.10.2.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.10.10.3"></td>
<td class="ltx_td" id="S6.T3.1.10.10.4"></td>
<td class="ltx_td" id="S6.T3.1.10.10.5"></td>
<td class="ltx_td" id="S6.T3.1.10.10.6"></td>
<td class="ltx_td" id="S6.T3.1.10.10.7"></td>
<td class="ltx_td" id="S6.T3.1.10.10.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.11.11">
<td class="ltx_td ltx_align_left" id="S6.T3.1.11.11.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.11.11.1.1">BLEU</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.11.11.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.11.11.2.1">✓</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.11.11.3">
<p class="ltx_p ltx_align_top" id="S6.T3.1.11.11.3.1">✓</p>
</td>
<td class="ltx_td ltx_align_center" id="S6.T3.1.11.11.4">
<p class="ltx_p ltx_align_top" id="S6.T3.1.11.11.4.1">✓</p>
</td>
<td class="ltx_td" id="S6.T3.1.11.11.5"></td>
<td class="ltx_td" id="S6.T3.1.11.11.6"></td>
<td class="ltx_td" id="S6.T3.1.11.11.7"></td>
<td class="ltx_td" id="S6.T3.1.11.11.8"></td>
</tr>
<tr class="ltx_tr" id="S6.T3.1.12.12">
<td class="ltx_td ltx_align_left ltx_border_bb" id="S6.T3.1.12.12.1">
<p class="ltx_p ltx_align_top" id="S6.T3.1.12.12.1.1">ROUGE/ROUGE-L</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S6.T3.1.12.12.2">
<p class="ltx_p ltx_align_top" id="S6.T3.1.12.12.2.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S6.T3.1.12.12.3">
<p class="ltx_p ltx_align_top" id="S6.T3.1.12.12.3.1">✓</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb" id="S6.T3.1.12.12.4">
<p class="ltx_p ltx_align_top" id="S6.T3.1.12.12.4.1">✓</p>
</td>
<td class="ltx_td ltx_border_bb" id="S6.T3.1.12.12.5"></td>
<td class="ltx_td ltx_border_bb" id="S6.T3.1.12.12.6"></td>
<td class="ltx_td ltx_border_bb" id="S6.T3.1.12.12.7"></td>
<td class="ltx_td ltx_border_bb" id="S6.T3.1.12.12.8"></td>
</tr>
</tbody>
</table>
</figure>
</section>
</section>
<section class="ltx_subsection" id="S6.SS4">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S6.SS4.5.1.1">VI-D</span> </span><span class="ltx_text ltx_font_italic" id="S6.SS4.6.2">Evaluation Benchmarks and Tools</span>
</h3>
<div class="ltx_para" id="S6.SS4.p1">
<p class="ltx_p" id="S6.SS4.p1.1">A series of benchmark tests and tools have been proposed to facilitate the evaluation of RAG.These instruments furnish quantitative metrics that not only gauge RAG model performance but also enhance comprehension of the model’s capabilities across various evaluation aspects. Prominent benchmarks such as RGB, RECALL and CRUD  <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib167" title="">167</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib168" title="">168</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib169" title="">169</a>]</cite> focus on appraising the essential abilities of RAG models. Concurrently, state-of-the-art automated tools like RAGAS <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib164" title="">164</a>]</cite>, ARES <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib165" title="">165</a>]</cite>, and TruLens<span class="ltx_note ltx_role_footnote" id="footnote8"><sup class="ltx_note_mark">8</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">8</sup><span class="ltx_tag ltx_tag_note">8</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.trulens.org/trulens_eval/core_concepts_rag_triad/" title="">https://www.trulens.org/trulens_eval/core_concepts_rag_triad/</a></span></span></span> employ LLMs to adjudicate the quality scores. These tools and benchmarks collectively form a robust framework for the systematic evaluation of RAG models, as summarized in Table <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S6.T4" title="TABLE IV ‣ VI-D Evaluation Benchmarks and Tools ‣ VI Task and Evaluation ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">IV</span></a>.
</p>
</div>
<figure class="ltx_table" id="S6.T4">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">TABLE IV: </span>Summary of evaluation frameworks</figcaption><div class="ltx_flex_figure ltx_flex_table">
<div class="ltx_flex_cell">
<table class="ltx_tabular ltx_centering ltx_align_middle" id="S6.T4.6">
<thead class="ltx_thead">
<tr class="ltx_tr" id="S6.T4.6.7.1">
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T4.6.7.1.1">
<div class="ltx_block ltx_align_top" id="S6.T4.6.7.1.1.1">
<p class="ltx_p" id="S6.T4.6.7.1.1.1.1"></p>
<p class="ltx_p" id="S6.T4.6.7.1.1.1.2"><span class="ltx_text" id="S6.T4.6.7.1.1.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.7.1.1.1.2.1.1">
<span class="ltx_tr" id="S6.T4.6.7.1.1.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.7.1.1.1.2.1.1.1.1"><span class="ltx_text ltx_font_bold" id="S6.T4.6.7.1.1.1.2.1.1.1.1.1" style="font-size:80%;">Evaluation Framework</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T4.6.7.1.1.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T4.6.7.1.2">
<div class="ltx_block ltx_align_top" id="S6.T4.6.7.1.2.1">
<p class="ltx_p" id="S6.T4.6.7.1.2.1.1"></p>
<p class="ltx_p" id="S6.T4.6.7.1.2.1.2"><span class="ltx_text" id="S6.T4.6.7.1.2.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.7.1.2.1.2.1.1">
<span class="ltx_tr" id="S6.T4.6.7.1.2.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.7.1.2.1.2.1.1.1.1"><span class="ltx_text ltx_font_bold" id="S6.T4.6.7.1.2.1.2.1.1.1.1.1" style="font-size:80%;">Evaluation Targets</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T4.6.7.1.2.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T4.6.7.1.3">
<div class="ltx_block ltx_align_top" id="S6.T4.6.7.1.3.1">
<p class="ltx_p" id="S6.T4.6.7.1.3.1.1"></p>
<p class="ltx_p" id="S6.T4.6.7.1.3.1.2"><span class="ltx_text" id="S6.T4.6.7.1.3.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.7.1.3.1.2.1.1">
<span class="ltx_tr" id="S6.T4.6.7.1.3.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.7.1.3.1.2.1.1.1.1"><span class="ltx_text ltx_font_bold" id="S6.T4.6.7.1.3.1.2.1.1.1.1.1" style="font-size:80%;">Evaluation Aspects</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T4.6.7.1.3.1.3"></p>
</div>
</th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt" id="S6.T4.6.7.1.4">
<div class="ltx_block ltx_align_top" id="S6.T4.6.7.1.4.1">
<p class="ltx_p" id="S6.T4.6.7.1.4.1.1"></p>
<p class="ltx_p" id="S6.T4.6.7.1.4.1.2"><span class="ltx_text" id="S6.T4.6.7.1.4.1.2.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.7.1.4.1.2.1.1">
<span class="ltx_tr" id="S6.T4.6.7.1.4.1.2.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.7.1.4.1.2.1.1.1.1"><span class="ltx_text ltx_font_bold" id="S6.T4.6.7.1.4.1.2.1.1.1.1.1" style="font-size:80%;">Quantitative Metrics</span></span></span>
</span></span></p>
<p class="ltx_p" id="S6.T4.6.7.1.4.1.3"></p>
</div>
</th>
</tr>
</thead>
<tbody class="ltx_tbody">
<tr class="ltx_tr" id="S6.T4.1.1">
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.1.1.1">
<div class="ltx_block ltx_align_top" id="S6.T4.1.1.1.1">
<p class="ltx_p" id="S6.T4.1.1.1.1.2"></p>
<p class="ltx_p" id="S6.T4.1.1.1.1.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.1.1.1.1.1.1">
<span class="ltx_tr" id="S6.T4.1.1.1.1.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.1.1.1.1.1.1">RGB<math alttext="{}^{\dagger}" class="ltx_Math" display="inline" id="S6.T4.1.1.1.1.1.1.1.1.m1.1"><semantics id="S6.T4.1.1.1.1.1.1.1.1.m1.1a"><msup id="S6.T4.1.1.1.1.1.1.1.1.m1.1.1" xref="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.cmml"><mi id="S6.T4.1.1.1.1.1.1.1.1.m1.1.1a" xref="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.cmml"></mi><mo id="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.1" xref="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.1.cmml">†</mo></msup><annotation-xml encoding="MathML-Content" id="S6.T4.1.1.1.1.1.1.1.1.m1.1b"><apply id="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.cmml" xref="S6.T4.1.1.1.1.1.1.1.1.m1.1.1"><ci id="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.1.cmml" xref="S6.T4.1.1.1.1.1.1.1.1.m1.1.1.1">†</ci></apply></annotation-xml><annotation encoding="application/x-tex" id="S6.T4.1.1.1.1.1.1.1.1.m1.1c">{}^{\dagger}</annotation><annotation encoding="application/x-llamapun" id="S6.T4.1.1.1.1.1.1.1.1.m1.1d">start_FLOATSUPERSCRIPT † end_FLOATSUPERSCRIPT</annotation></semantics></math></span></span>
</span></p>
<p class="ltx_p" id="S6.T4.1.1.1.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.1.1.2">
<div class="ltx_block ltx_align_top" id="S6.T4.1.1.2.1">
<p class="ltx_p" id="S6.T4.1.1.2.1.1"></p>
<p class="ltx_p" id="S6.T4.1.1.2.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.1.1.2.1.2.1">
<span class="ltx_tr" id="S6.T4.1.1.2.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.2.1.2.1.1.1">Retrieval Quality</span></span>
<span class="ltx_tr" id="S6.T4.1.1.2.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.2.1.2.1.2.1">Generation Quality</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.1.1.2.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.1.1.3">
<div class="ltx_block ltx_align_top" id="S6.T4.1.1.3.1">
<p class="ltx_p" id="S6.T4.1.1.3.1.1"></p>
<p class="ltx_p" id="S6.T4.1.1.3.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.1.1.3.1.2.1">
<span class="ltx_tr" id="S6.T4.1.1.3.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.3.1.2.1.1.1">Noise Robustness</span></span>
<span class="ltx_tr" id="S6.T4.1.1.3.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.3.1.2.1.2.1">Negative Rejection</span></span>
<span class="ltx_tr" id="S6.T4.1.1.3.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.3.1.2.1.3.1">Information Integration</span></span>
<span class="ltx_tr" id="S6.T4.1.1.3.1.2.1.4">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.3.1.2.1.4.1">Counterfactual Robustness</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.1.1.3.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.1.1.4">
<div class="ltx_block ltx_align_top" id="S6.T4.1.1.4.1">
<p class="ltx_p" id="S6.T4.1.1.4.1.1"></p>
<p class="ltx_p" id="S6.T4.1.1.4.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.1.1.4.1.2.1">
<span class="ltx_tr" id="S6.T4.1.1.4.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.4.1.2.1.1.1">Accuracy</span></span>
<span class="ltx_tr" id="S6.T4.1.1.4.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.4.1.2.1.2.1">EM</span></span>
<span class="ltx_tr" id="S6.T4.1.1.4.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.4.1.2.1.3.1">Accuracy</span></span>
<span class="ltx_tr" id="S6.T4.1.1.4.1.2.1.4">
<span class="ltx_td ltx_align_center" id="S6.T4.1.1.4.1.2.1.4.1">Accuracy</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.1.1.4.1.3"></p>
</div>
</td>
</tr>
<tr class="ltx_tr" id="S6.T4.2.2">
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.2.2.1">
<div class="ltx_block ltx_align_top" id="S6.T4.2.2.1.1">
<p class="ltx_p" id="S6.T4.2.2.1.1.2"></p>
<p class="ltx_p" id="S6.T4.2.2.1.1.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.2.2.1.1.1.1">
<span class="ltx_tr" id="S6.T4.2.2.1.1.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.2.2.1.1.1.1.1.1">RECALL<math alttext="{}^{\dagger}" class="ltx_Math" display="inline" id="S6.T4.2.2.1.1.1.1.1.1.m1.1"><semantics id="S6.T4.2.2.1.1.1.1.1.1.m1.1a"><msup id="S6.T4.2.2.1.1.1.1.1.1.m1.1.1" xref="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.cmml"><mi id="S6.T4.2.2.1.1.1.1.1.1.m1.1.1a" xref="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.cmml"></mi><mo id="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.1" xref="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.1.cmml">†</mo></msup><annotation-xml encoding="MathML-Content" id="S6.T4.2.2.1.1.1.1.1.1.m1.1b"><apply id="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.cmml" xref="S6.T4.2.2.1.1.1.1.1.1.m1.1.1"><ci id="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.1.cmml" xref="S6.T4.2.2.1.1.1.1.1.1.m1.1.1.1">†</ci></apply></annotation-xml><annotation encoding="application/x-tex" id="S6.T4.2.2.1.1.1.1.1.1.m1.1c">{}^{\dagger}</annotation><annotation encoding="application/x-llamapun" id="S6.T4.2.2.1.1.1.1.1.1.m1.1d">start_FLOATSUPERSCRIPT † end_FLOATSUPERSCRIPT</annotation></semantics></math></span></span>
</span></p>
<p class="ltx_p" id="S6.T4.2.2.1.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.2.2.2">
<p class="ltx_p ltx_align_top" id="S6.T4.2.2.2.1">Generation Quality</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.2.2.3">
<p class="ltx_p ltx_align_top" id="S6.T4.2.2.3.1">Counterfactual Robustness</p>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.2.2.4">
<p class="ltx_p ltx_align_top" id="S6.T4.2.2.4.1">R-Rate (Reappearance Rate)</p>
</td>
</tr>
<tr class="ltx_tr" id="S6.T4.3.3">
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.3.3.1">
<div class="ltx_block ltx_align_top" id="S6.T4.3.3.1.1">
<p class="ltx_p" id="S6.T4.3.3.1.1.2"></p>
<p class="ltx_p" id="S6.T4.3.3.1.1.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.3.3.1.1.1.1">
<span class="ltx_tr" id="S6.T4.3.3.1.1.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.1.1.1.1.1.1">RAGAS<math alttext="{}^{\ddagger}" class="ltx_Math" display="inline" id="S6.T4.3.3.1.1.1.1.1.1.m1.1"><semantics id="S6.T4.3.3.1.1.1.1.1.1.m1.1a"><msup id="S6.T4.3.3.1.1.1.1.1.1.m1.1.1" xref="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.cmml"><mi id="S6.T4.3.3.1.1.1.1.1.1.m1.1.1a" xref="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.cmml"></mi><mo id="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.1" xref="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.1.cmml">‡</mo></msup><annotation-xml encoding="MathML-Content" id="S6.T4.3.3.1.1.1.1.1.1.m1.1b"><apply id="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.cmml" xref="S6.T4.3.3.1.1.1.1.1.1.m1.1.1"><ci id="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.1.cmml" xref="S6.T4.3.3.1.1.1.1.1.1.m1.1.1.1">‡</ci></apply></annotation-xml><annotation encoding="application/x-tex" id="S6.T4.3.3.1.1.1.1.1.1.m1.1c">{}^{\ddagger}</annotation><annotation encoding="application/x-llamapun" id="S6.T4.3.3.1.1.1.1.1.1.m1.1d">start_FLOATSUPERSCRIPT ‡ end_FLOATSUPERSCRIPT</annotation></semantics></math></span></span>
</span></p>
<p class="ltx_p" id="S6.T4.3.3.1.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.3.3.2">
<div class="ltx_block ltx_align_top" id="S6.T4.3.3.2.1">
<p class="ltx_p" id="S6.T4.3.3.2.1.1"></p>
<p class="ltx_p" id="S6.T4.3.3.2.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.3.3.2.1.2.1">
<span class="ltx_tr" id="S6.T4.3.3.2.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.2.1.2.1.1.1">Retrieval Quality</span></span>
<span class="ltx_tr" id="S6.T4.3.3.2.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.2.1.2.1.2.1">Generation Quality</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.3.3.2.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.3.3.3">
<div class="ltx_block ltx_align_top" id="S6.T4.3.3.3.1">
<p class="ltx_p" id="S6.T4.3.3.3.1.1"></p>
<p class="ltx_p" id="S6.T4.3.3.3.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.3.3.3.1.2.1">
<span class="ltx_tr" id="S6.T4.3.3.3.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.3.1.2.1.1.1">Context Relevance</span></span>
<span class="ltx_tr" id="S6.T4.3.3.3.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.3.1.2.1.2.1">Faithfulness</span></span>
<span class="ltx_tr" id="S6.T4.3.3.3.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.3.1.2.1.3.1">Answer Relevance</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.3.3.3.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.3.3.4">
<div class="ltx_block ltx_align_top" id="S6.T4.3.3.4.1">
<p class="ltx_p" id="S6.T4.3.3.4.1.1"></p>
<p class="ltx_p" id="S6.T4.3.3.4.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.3.3.4.1.2.1">
<span class="ltx_tr" id="S6.T4.3.3.4.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.4.1.2.1.1.1">*</span></span>
<span class="ltx_tr" id="S6.T4.3.3.4.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.3.3.4.1.2.1.2.1">Cosine Similarity</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.3.3.4.1.3"></p>
</div>
</td>
</tr>
<tr class="ltx_tr" id="S6.T4.4.4">
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.4.4.1">
<div class="ltx_block ltx_align_top" id="S6.T4.4.4.1.1">
<p class="ltx_p" id="S6.T4.4.4.1.1.2"></p>
<p class="ltx_p" id="S6.T4.4.4.1.1.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.4.4.1.1.1.1">
<span class="ltx_tr" id="S6.T4.4.4.1.1.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.1.1.1.1.1.1">ARES<math alttext="{}^{\ddagger}" class="ltx_Math" display="inline" id="S6.T4.4.4.1.1.1.1.1.1.m1.1"><semantics id="S6.T4.4.4.1.1.1.1.1.1.m1.1a"><msup id="S6.T4.4.4.1.1.1.1.1.1.m1.1.1" xref="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.cmml"><mi id="S6.T4.4.4.1.1.1.1.1.1.m1.1.1a" xref="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.cmml"></mi><mo id="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.1" xref="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.1.cmml">‡</mo></msup><annotation-xml encoding="MathML-Content" id="S6.T4.4.4.1.1.1.1.1.1.m1.1b"><apply id="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.cmml" xref="S6.T4.4.4.1.1.1.1.1.1.m1.1.1"><ci id="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.1.cmml" xref="S6.T4.4.4.1.1.1.1.1.1.m1.1.1.1">‡</ci></apply></annotation-xml><annotation encoding="application/x-tex" id="S6.T4.4.4.1.1.1.1.1.1.m1.1c">{}^{\ddagger}</annotation><annotation encoding="application/x-llamapun" id="S6.T4.4.4.1.1.1.1.1.1.m1.1d">start_FLOATSUPERSCRIPT ‡ end_FLOATSUPERSCRIPT</annotation></semantics></math></span></span>
</span></p>
<p class="ltx_p" id="S6.T4.4.4.1.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.4.4.2">
<div class="ltx_block ltx_align_top" id="S6.T4.4.4.2.1">
<p class="ltx_p" id="S6.T4.4.4.2.1.1"></p>
<p class="ltx_p" id="S6.T4.4.4.2.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.4.4.2.1.2.1">
<span class="ltx_tr" id="S6.T4.4.4.2.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.2.1.2.1.1.1">Retrieval Quality</span></span>
<span class="ltx_tr" id="S6.T4.4.4.2.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.2.1.2.1.2.1">Generation Quality</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.4.4.2.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.4.4.3">
<div class="ltx_block ltx_align_top" id="S6.T4.4.4.3.1">
<p class="ltx_p" id="S6.T4.4.4.3.1.1"></p>
<p class="ltx_p" id="S6.T4.4.4.3.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.4.4.3.1.2.1">
<span class="ltx_tr" id="S6.T4.4.4.3.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.3.1.2.1.1.1">Context Relevance</span></span>
<span class="ltx_tr" id="S6.T4.4.4.3.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.3.1.2.1.2.1">Faithfulness</span></span>
<span class="ltx_tr" id="S6.T4.4.4.3.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.3.1.2.1.3.1">Answer Relevance</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.4.4.3.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.4.4.4">
<div class="ltx_block ltx_align_top" id="S6.T4.4.4.4.1">
<p class="ltx_p" id="S6.T4.4.4.4.1.1"></p>
<p class="ltx_p" id="S6.T4.4.4.4.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.4.4.4.1.2.1">
<span class="ltx_tr" id="S6.T4.4.4.4.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.4.1.2.1.1.1">Accuracy</span></span>
<span class="ltx_tr" id="S6.T4.4.4.4.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.4.1.2.1.2.1">Accuracy</span></span>
<span class="ltx_tr" id="S6.T4.4.4.4.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.4.4.4.1.2.1.3.1">Accuracy</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.4.4.4.1.3"></p>
</div>
</td>
</tr>
<tr class="ltx_tr" id="S6.T4.5.5">
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.5.5.1">
<div class="ltx_block ltx_align_top" id="S6.T4.5.5.1.1">
<p class="ltx_p" id="S6.T4.5.5.1.1.2"></p>
<p class="ltx_p" id="S6.T4.5.5.1.1.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.5.5.1.1.1.1">
<span class="ltx_tr" id="S6.T4.5.5.1.1.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.1.1.1.1.1.1">TruLens<math alttext="{}^{\ddagger}" class="ltx_Math" display="inline" id="S6.T4.5.5.1.1.1.1.1.1.m1.1"><semantics id="S6.T4.5.5.1.1.1.1.1.1.m1.1a"><msup id="S6.T4.5.5.1.1.1.1.1.1.m1.1.1" xref="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.cmml"><mi id="S6.T4.5.5.1.1.1.1.1.1.m1.1.1a" xref="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.cmml"></mi><mo id="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.1" xref="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.1.cmml">‡</mo></msup><annotation-xml encoding="MathML-Content" id="S6.T4.5.5.1.1.1.1.1.1.m1.1b"><apply id="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.cmml" xref="S6.T4.5.5.1.1.1.1.1.1.m1.1.1"><ci id="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.1.cmml" xref="S6.T4.5.5.1.1.1.1.1.1.m1.1.1.1">‡</ci></apply></annotation-xml><annotation encoding="application/x-tex" id="S6.T4.5.5.1.1.1.1.1.1.m1.1c">{}^{\ddagger}</annotation><annotation encoding="application/x-llamapun" id="S6.T4.5.5.1.1.1.1.1.1.m1.1d">start_FLOATSUPERSCRIPT ‡ end_FLOATSUPERSCRIPT</annotation></semantics></math></span></span>
</span></p>
<p class="ltx_p" id="S6.T4.5.5.1.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.5.5.2">
<div class="ltx_block ltx_align_top" id="S6.T4.5.5.2.1">
<p class="ltx_p" id="S6.T4.5.5.2.1.1"></p>
<p class="ltx_p" id="S6.T4.5.5.2.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.5.5.2.1.2.1">
<span class="ltx_tr" id="S6.T4.5.5.2.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.2.1.2.1.1.1">Retrieval Quality</span></span>
<span class="ltx_tr" id="S6.T4.5.5.2.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.2.1.2.1.2.1">Generation Quality</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.5.5.2.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.5.5.3">
<div class="ltx_block ltx_align_top" id="S6.T4.5.5.3.1">
<p class="ltx_p" id="S6.T4.5.5.3.1.1"></p>
<p class="ltx_p" id="S6.T4.5.5.3.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.5.5.3.1.2.1">
<span class="ltx_tr" id="S6.T4.5.5.3.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.3.1.2.1.1.1">Context Relevance</span></span>
<span class="ltx_tr" id="S6.T4.5.5.3.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.3.1.2.1.2.1">Faithfulness</span></span>
<span class="ltx_tr" id="S6.T4.5.5.3.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.3.1.2.1.3.1">Answer Relevance</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.5.5.3.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_t" id="S6.T4.5.5.4">
<div class="ltx_block ltx_align_top" id="S6.T4.5.5.4.1">
<p class="ltx_p" id="S6.T4.5.5.4.1.1"></p>
<p class="ltx_p" id="S6.T4.5.5.4.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.5.5.4.1.2.1">
<span class="ltx_tr" id="S6.T4.5.5.4.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.5.5.4.1.2.1.1.1">*</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.5.5.4.1.3"></p>
</div>
</td>
</tr>
<tr class="ltx_tr" id="S6.T4.6.6">
<td class="ltx_td ltx_align_center ltx_border_bb ltx_border_t" id="S6.T4.6.6.1">
<div class="ltx_block ltx_align_top" id="S6.T4.6.6.1.1">
<p class="ltx_p" id="S6.T4.6.6.1.1.2"></p>
<p class="ltx_p" id="S6.T4.6.6.1.1.1">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.6.1.1.1.1">
<span class="ltx_tr" id="S6.T4.6.6.1.1.1.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.1.1.1.1.1.1">CRUD<math alttext="{}^{\dagger}" class="ltx_Math" display="inline" id="S6.T4.6.6.1.1.1.1.1.1.m1.1"><semantics id="S6.T4.6.6.1.1.1.1.1.1.m1.1a"><msup id="S6.T4.6.6.1.1.1.1.1.1.m1.1.1" xref="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.cmml"><mi id="S6.T4.6.6.1.1.1.1.1.1.m1.1.1a" xref="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.cmml"></mi><mo id="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.1" xref="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.1.cmml">†</mo></msup><annotation-xml encoding="MathML-Content" id="S6.T4.6.6.1.1.1.1.1.1.m1.1b"><apply id="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.cmml" xref="S6.T4.6.6.1.1.1.1.1.1.m1.1.1"><ci id="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.1.cmml" xref="S6.T4.6.6.1.1.1.1.1.1.m1.1.1.1">†</ci></apply></annotation-xml><annotation encoding="application/x-tex" id="S6.T4.6.6.1.1.1.1.1.1.m1.1c">{}^{\dagger}</annotation><annotation encoding="application/x-llamapun" id="S6.T4.6.6.1.1.1.1.1.1.m1.1d">start_FLOATSUPERSCRIPT † end_FLOATSUPERSCRIPT</annotation></semantics></math></span></span>
</span></p>
<p class="ltx_p" id="S6.T4.6.6.1.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb ltx_border_t" id="S6.T4.6.6.2">
<div class="ltx_block ltx_align_top" id="S6.T4.6.6.2.1">
<p class="ltx_p" id="S6.T4.6.6.2.1.1"></p>
<p class="ltx_p" id="S6.T4.6.6.2.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.6.2.1.2.1">
<span class="ltx_tr" id="S6.T4.6.6.2.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.2.1.2.1.1.1">Retrieval Quality</span></span>
<span class="ltx_tr" id="S6.T4.6.6.2.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.2.1.2.1.2.1">Generation Quality</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.6.6.2.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb ltx_border_t" id="S6.T4.6.6.3">
<div class="ltx_block ltx_align_top" id="S6.T4.6.6.3.1">
<p class="ltx_p" id="S6.T4.6.6.3.1.1"></p>
<p class="ltx_p" id="S6.T4.6.6.3.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.6.3.1.2.1">
<span class="ltx_tr" id="S6.T4.6.6.3.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.3.1.2.1.1.1">Creative Generation</span></span>
<span class="ltx_tr" id="S6.T4.6.6.3.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.3.1.2.1.2.1">Knowledge-intensive QA</span></span>
<span class="ltx_tr" id="S6.T4.6.6.3.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.3.1.2.1.3.1">Error Correction</span></span>
<span class="ltx_tr" id="S6.T4.6.6.3.1.2.1.4">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.3.1.2.1.4.1">Summarization</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.6.6.3.1.3"></p>
</div>
</td>
<td class="ltx_td ltx_align_center ltx_border_bb ltx_border_t" id="S6.T4.6.6.4">
<div class="ltx_block ltx_align_top" id="S6.T4.6.6.4.1">
<p class="ltx_p" id="S6.T4.6.6.4.1.1"></p>
<p class="ltx_p" id="S6.T4.6.6.4.1.2">
<span class="ltx_tabular ltx_align_middle" id="S6.T4.6.6.4.1.2.1">
<span class="ltx_tr" id="S6.T4.6.6.4.1.2.1.1">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.4.1.2.1.1.1">BLEU</span></span>
<span class="ltx_tr" id="S6.T4.6.6.4.1.2.1.2">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.4.1.2.1.2.1">ROUGE-L</span></span>
<span class="ltx_tr" id="S6.T4.6.6.4.1.2.1.3">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.4.1.2.1.3.1">BertScore</span></span>
<span class="ltx_tr" id="S6.T4.6.6.4.1.2.1.4">
<span class="ltx_td ltx_align_center" id="S6.T4.6.6.4.1.2.1.4.1">RAGQuestEval</span></span>
</span></p>
<p class="ltx_p" id="S6.T4.6.6.4.1.3"></p>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ltx_flex_break"></div>
<div class="ltx_flex_cell">
<p class="ltx_p ltx_align_center" id="S6.T4.7"><span class="ltx_text ltx_font_italic" id="S6.T4.7.1">† represents a benchmark, and ‡ represents a tool. * denotes customized quantitative metrics, which deviate from traditional metrics. Readers are encouraged to consult pertinent literature for the specific quantification formulas associated with these metrics, as required.</span></p>
</div>
</div>
</figure>
</section>
</section>
<section class="ltx_section" id="S7">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">VII </span><span class="ltx_text ltx_font_smallcaps" id="S7.1.1">Discussion and Future Prospects</span>
</h2>
<div class="ltx_para" id="S7.p1">
<p class="ltx_p" id="S7.p1.1">Despite the considerable progress in RAG technology, several challenges persist that warrant in-depth research.This chapter will mainly introduce the current challenges and future research directions faced by RAG.</p>
</div>
<section class="ltx_subsection" id="S7.SS1">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S7.SS1.5.1.1">VII-A</span> </span><span class="ltx_text ltx_font_italic" id="S7.SS1.6.2">RAG vs Long Context</span>
</h3>
<div class="ltx_para" id="S7.SS1.p1">
<p class="ltx_p" id="S7.SS1.p1.1">With the deepening of related research, the context of LLMs is continuously expanding <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib170" title="">170</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib171" title="">171</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib172" title="">172</a>]</cite>. Presently, LLMs can effortlessly manage contexts exceeding 200,000 tokens <span class="ltx_note ltx_role_footnote" id="footnote9"><sup class="ltx_note_mark">9</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">9</sup><span class="ltx_tag ltx_tag_note">9</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://kimi.moonshot.cn" title="">https://kimi.moonshot.cn</a></span></span></span>. This capability signifies that long-document question answering, previously reliant on RAG, can now incorporate the entire document directly into the prompt. This has also sparked discussions on whether RAG is still necessary when LLMs are not constrained by context. In fact, RAG still plays an irreplaceable role. On one hand, providing LLMs with a large amount of context at once will significantly impact its inference speed, while chunked retrieval and on-demand input can significantly improve operational efficiency. On the other hand, RAG-based generation can quickly locate the original references for LLMs to help users verify the generated answers. The entire retrieval and reasoning process is observable, while generation solely relying on long context remains a black box. Conversely, the expansion of context provides new opportunities for the development of RAG, enabling it to address more complex problems and integrative or summary questions that require reading a large amount of material to answer <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib49" title="">49</a>]</cite>. Developing new RAG methods in the context of super-long contexts is one of the future research trends.</p>
</div>
</section>
<section class="ltx_subsection" id="S7.SS2">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S7.SS2.5.1.1">VII-B</span> </span><span class="ltx_text ltx_font_italic" id="S7.SS2.6.2">RAG Robustness</span>
</h3>
<div class="ltx_para" id="S7.SS2.p1">
<p class="ltx_p" id="S7.SS2.p1.1">The presence of noise or contradictory information during retrieval can detrimentally affect RAG’s output quality. This situation is figuratively referred to as “Misinformation can be worse than no information at all”. Improving RAG’s resistance to such adversarial or counterfactual inputs is gaining research momentum and has become a key performance metric <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib50" title="">50</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib48" title="">48</a>, <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib82" title="">82</a>]</cite>. Cuconasu et al. <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib54" title="">54</a>]</cite> analyze which type of documents should be retrieved, evaluate the relevance of the documents to the prompt, their position, and the number included in the context. The research findings reveal that including irrelevant documents can unexpectedly increase accuracy by over 30%, contradicting the initial assumption of reduced quality. These results underscore the importance of developing specialized strategies to integrate retrieval with language generation models, highlighting the need for further research and exploration into the robustness of RAG.</p>
</div>
</section>
<section class="ltx_subsection" id="S7.SS3">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S7.SS3.5.1.1">VII-C</span> </span><span class="ltx_text ltx_font_italic" id="S7.SS3.6.2">Hybrid Approaches </span>
</h3>
<div class="ltx_para" id="S7.SS3.p1">
<p class="ltx_p" id="S7.SS3.p1.1">Combining RAG with fine-tuning is emerging as a leading strategy. Determining the optimal integration of RAG and fine-tuning whether sequential, alternating, or through end-to-end joint training—and how to harness both parameterized and non-parameterized advantages are areas ripe for exploration <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib27" title="">27</a>]</cite>. Another trend is to introduce SLMs with specific functionalities into RAG and fine-tuned by the results of RAG system. For example, CRAG <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib67" title="">67</a>]</cite> trains a lightweight retrieval evaluator to assess the overall quality of the retrieved documents for a query and triggers different knowledge retrieval actions based on confidence levels.</p>
</div>
</section>
<section class="ltx_subsection" id="S7.SS4">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S7.SS4.5.1.1">VII-D</span> </span><span class="ltx_text ltx_font_italic" id="S7.SS4.6.2">Scaling laws of RAG </span>
</h3>
<div class="ltx_para" id="S7.SS4.p1">
<p class="ltx_p" id="S7.SS4.p1.1">End-to-end RAG models and pre-trained models based on RAG are still one of the focuses of current researchers <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib173" title="">173</a>]</cite>.The parameters of these models are one of the key factors.While scaling laws <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib174" title="">174</a>]</cite> are established for LLMs, their applicability to RAG remains uncertain. Initial studies like RETRO++ <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib44" title="">44</a>]</cite> have begun to address this, yet the parameter count in RAG models still lags behind that of LLMs. The possibility of an Inverse Scaling Law <span class="ltx_note ltx_role_footnote" id="footnote10"><sup class="ltx_note_mark">10</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">10</sup><span class="ltx_tag ltx_tag_note">10</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/inverse-scaling/prize" title="">https://github.com/inverse-scaling/prize</a></span></span></span>, where smaller models outperform larger ones, is particularly intriguing and merits further investigation.</p>
</div>
</section>
<section class="ltx_subsection" id="S7.SS5">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S7.SS5.5.1.1">VII-E</span> </span><span class="ltx_text ltx_font_italic" id="S7.SS5.6.2">Production-Ready RAG</span>
</h3>
<div class="ltx_para" id="S7.SS5.p1">
<p class="ltx_p" id="S7.SS5.p1.1">RAG’s practicality and alignment with engineering requirements have facilitated its adoption. However, enhancing retrieval efficiency, improving document recall in large knowledge bases, and ensuring data security—such as preventing inadvertent disclosure of document sources or metadata by LLMs—are critical engineering challenges that remain to be addressed <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib175" title="">175</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S7.SS5.p2">
<p class="ltx_p" id="S7.SS5.p2.1">The development of the RAG ecosystem is greatly impacted by the progression of its technical stack. Key tools like LangChain and LLamaIndex have quickly gained popularity with the emergence of ChatGPT, providing extensive RAG-related APIs and becoming essential in the realm of LLMs.The emerging technology stack, while not as rich in features as LangChain and LLamaIndex, stands out through its specialized products. For example, Flowise AI prioritizes a low-code approach, allowing users to deploy AI applications, including RAG, through a user-friendly drag-and-drop interface. Other technologies like HayStack, Meltano, and Cohere Coral are also gaining attention for their unique contributions to the field.</p>
</div>
<div class="ltx_para" id="S7.SS5.p3">
<p class="ltx_p" id="S7.SS5.p3.1">In addition to AI-focused vendors, traditional software and cloud service providers are expanding their offerings to include RAG-centric services. Weaviate’s Verba <span class="ltx_note ltx_role_footnote" id="footnote11"><sup class="ltx_note_mark">11</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">11</sup><span class="ltx_tag ltx_tag_note">11</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/weaviate/Verba" title="">https://github.com/weaviate/Verba</a></span></span></span> is designed for personal assistant applications, while Amazon’s Kendra <span class="ltx_note ltx_role_footnote" id="footnote12"><sup class="ltx_note_mark">12</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">12</sup><span class="ltx_tag ltx_tag_note">12</span><a class="ltx_ref ltx_url ltx_font_typewriter" href="https://aws.amazon.com/cn/kendra/" title="">https://aws.amazon.com/cn/kendra/</a></span></span></span> offers intelligent enterprise search services, enabling users to browse various content repositories using built-in connectors. In the development of RAG technology, there is a clear trend towards different specialization directions, such as: 1) Customization - tailoring RAG to meet specific requirements. 2) Simplification - making RAG easier to use to reduce the initial learning curve. 3) Specialization - optimizing RAG to better serve production environments.</p>
</div>
<div class="ltx_para" id="S7.SS5.p4">
<p class="ltx_p" id="S7.SS5.p4.1">The mutual growth of RAG models and their technology stacks is evident; technological advancements continuously establish new standards for existing infrastructure. In turn, enhancements to the technology stack drive the development of RAG capabilities. RAG toolkits are converging into a foundational technology stack, laying the groundwork for advanced enterprise applications. However, a fully integrated, comprehensive platform concept is still in the future, requiring further innovation and development.</p>
</div>
<figure class="ltx_figure" id="S7.F6"><img alt="Refer to caption" class="ltx_graphics ltx_centering ltx_img_landscape" height="254" id="S7.F6.g1" src="extracted/5498883/images/rag_summary.png" width="413"/>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 6: </span>Summary of RAG ecosystem</figcaption>
</figure>
</section>
<section class="ltx_subsection" id="S7.SS6">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection"><span class="ltx_text" id="S7.SS6.5.1.1">VII-F</span> </span><span class="ltx_text ltx_font_italic" id="S7.SS6.6.2">Multi-modal RAG</span>
</h3>
<div class="ltx_para" id="S7.SS6.p1">
<p class="ltx_p" id="S7.SS6.p1.1">RAG has transcended its initial text-based question-answering confines, embracing a diverse array of modal data. This expansion has spawned innovative multimodal models that integrate RAG concepts across various domains:</p>
</div>
<div class="ltx_para" id="S7.SS6.p2">
<p class="ltx_p" id="S7.SS6.p2.1"><em class="ltx_emph ltx_font_italic" id="S7.SS6.p2.1.1">Image</em>. RA-CM3 <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib176" title="">176</a>]</cite> stands as a pioneering multimodal model of both retrieving and generating text and images. BLIP-2 <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib177" title="">177</a>]</cite> leverages frozen image encoders alongside LLMs for efficient visual language pre-training, enabling zero-shot image-to-text conversions. The “Visualize Before You Write” method <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib178" title="">178</a>]</cite> employs image generation to steer the LM’s text generation, showing promise in open-ended text generation tasks.</p>
</div>
<div class="ltx_para" id="S7.SS6.p3">
<p class="ltx_p" id="S7.SS6.p3.1"><em class="ltx_emph ltx_font_italic" id="S7.SS6.p3.1.1">Audio and Video</em>. The GSS method retrieves and stitches together audio clips to convert machine-translated data into speech-translated data <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib179" title="">179</a>]</cite>. UEOP marks a significant advancement in end-to-end automatic speech recognition by incorporating external, offline strategies for voice-to-text conversion <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib180" title="">180</a>]</cite>. Additionally, KNN-based attention fusion leverages audio embeddings and semantically related text embeddings to refine ASR, thereby accelerating domain adaptation. Vid2Seq augments language models with specialized temporal markers, facilitating the prediction of event boundaries and textual descriptions within a unified output sequence <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib181" title="">181</a>]</cite>.</p>
</div>
<div class="ltx_para" id="S7.SS6.p4">
<p class="ltx_p" id="S7.SS6.p4.1"><em class="ltx_emph ltx_font_italic" id="S7.SS6.p4.1.1">Code</em>. RBPS <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib182" title="">182</a>]</cite> excels in small-scale learning tasks by retrieving code examples that align with developers’ objectives through encoding and frequency analysis. This approach has demonstrated efficacy in tasks such as test assertion generation and program repair. For structured knowledge, the CoK method <cite class="ltx_cite ltx_citemacro_cite">[<a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#bib.bib106" title="">106</a>]</cite> first extracts facts pertinent to the input query from a knowledge graph, then integrates these facts as hints within the input, enhancing performance in knowledge graph question-answering tasks.
</p>
</div>
</section>
</section>
<section class="ltx_section" id="S8">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">VIII </span><span class="ltx_text ltx_font_smallcaps" id="S8.1.1">Conclusion</span>
</h2>
<div class="ltx_para" id="S8.p1">
<p class="ltx_p" id="S8.p1.1">The summary of this paper, as depicted in Figure <a class="ltx_ref" href="https://arxiv.org/html/2312.10997v5#S7.F6" title="Figure 6 ‣ VII-E Production-Ready RAG ‣ VII Discussion and Future Prospects ‣ Retrieval-Augmented Generation for Large Language Models: A Survey"><span class="ltx_text ltx_ref_tag">6</span></a>, emphasizes RAG’s significant advancement in enhancing the capabilities of LLMs by integrating parameterized knowledge from language models with extensive non-parameterized data from external knowledge bases. The survey showcases the evolution of RAG technologies and their application on many different tasks. The analysis outlines three developmental paradigms within the RAG framework: Naive, Advanced, and Modular RAG, each representing a progressive enhancement over its predecessors. RAG’s technical integration with other AI methodologies, such as fine-tuning and reinforcement learning, has further expanded its capabilities. Despite the progress in RAG technology, there are research opportunities to improve its robustness and its ability to handle extended contexts. RAG’s application scope is expanding into multimodal domains, adapting its principles to interpret and process diverse data forms like images, videos, and code. This expansion highlights RAG’s significant practical implications for AI deployment, attracting interest from academic and industrial sectors. The growing ecosystem of RAG is evidenced by the rise in RAG-centric AI applications and the continuous development of supportive tools. As RAG’s application landscape broadens, there is a need to refine evaluation methodologies to keep pace with its evolution. Ensuring accurate and representative performance assessments is crucial for fully capturing RAG’s contributions to the AI research and development community.</p>
</div>
</section>
<section class="ltx_bibliography" id="bib">
<h2 class="ltx_title ltx_title_bibliography">References</h2>
<ul class="ltx_biblist">
<li class="ltx_bibitem" id="bib.bib1">
<span class="ltx_tag ltx_tag_bibitem">[1]</span>
<span class="ltx_bibblock">
N. Kandpal, H. Deng, A. Roberts, E. Wallace, and C. Raffel, “Large language models struggle to learn long-tail knowledge,” in <em class="ltx_emph ltx_font_italic" id="bib.bib1.1.1">International Conference on Machine Learning</em>.   PMLR, 2023, pp. 15 696–15 707.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib2">
<span class="ltx_tag ltx_tag_bibitem">[2]</span>
<span class="ltx_bibblock">
Y. Zhang, Y. Li, L. Cui, D. Cai, L. Liu, T. Fu, X. Huang, E. Zhao, Y. Zhang, Y. Chen <em class="ltx_emph ltx_font_italic" id="bib.bib2.1.1">et al.</em>, “Siren’s song in the ai ocean: A survey on hallucination in large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib2.2.2">arXiv preprint arXiv:2309.01219</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib3">
<span class="ltx_tag ltx_tag_bibitem">[3]</span>
<span class="ltx_bibblock">
D. Arora, A. Kini, S. R. Chowdhury, N. Natarajan, G. Sinha, and A. Sharma, “Gar-meets-rag paradigm for zero-shot information retrieval,” <em class="ltx_emph ltx_font_italic" id="bib.bib3.1.1">arXiv preprint arXiv:2310.20158</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib4">
<span class="ltx_tag ltx_tag_bibitem">[4]</span>
<span class="ltx_bibblock">
P. Lewis, E. Perez, A. Piktus, F. Petroni, V. Karpukhin, N. Goyal, H. Küttler, M. Lewis, W.-t. Yih, T. Rocktäschel <em class="ltx_emph ltx_font_italic" id="bib.bib4.1.1">et al.</em>, “Retrieval-augmented generation for knowledge-intensive nlp tasks,” <em class="ltx_emph ltx_font_italic" id="bib.bib4.2.2">Advances in Neural Information Processing Systems</em>, vol. 33, pp. 9459–9474, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib5">
<span class="ltx_tag ltx_tag_bibitem">[5]</span>
<span class="ltx_bibblock">
S. Borgeaud, A. Mensch, J. Hoffmann, T. Cai, E. Rutherford, K. Millican, G. B. Van Den Driessche, J.-B. Lespiau, B. Damoc, A. Clark <em class="ltx_emph ltx_font_italic" id="bib.bib5.1.1">et al.</em>, “Improving language models by retrieving from trillions of tokens,” in <em class="ltx_emph ltx_font_italic" id="bib.bib5.2.2">International conference on machine learning</em>.   PMLR, 2022, pp. 2206–2240.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib6">
<span class="ltx_tag ltx_tag_bibitem">[6]</span>
<span class="ltx_bibblock">
L. Ouyang, J. Wu, X. Jiang, D. Almeida, C. Wainwright, P. Mishkin, C. Zhang, S. Agarwal, K. Slama, A. Ray <em class="ltx_emph ltx_font_italic" id="bib.bib6.1.1">et al.</em>, “Training language models to follow instructions with human feedback,” <em class="ltx_emph ltx_font_italic" id="bib.bib6.2.2">Advances in neural information processing systems</em>, vol. 35, pp. 27 730–27 744, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib7">
<span class="ltx_tag ltx_tag_bibitem">[7]</span>
<span class="ltx_bibblock">
X. Ma, Y. Gong, P. He, H. Zhao, and N. Duan, “Query rewriting for retrieval-augmented large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib7.1.1">arXiv preprint arXiv:2305.14283</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib8">
<span class="ltx_tag ltx_tag_bibitem">[8]</span>
<span class="ltx_bibblock">
I. ILIN, “Advanced rag techniques: an illustrated overview,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://pub.towardsai.net/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6" title="">https://pub.towardsai.net/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib9">
<span class="ltx_tag ltx_tag_bibitem">[9]</span>
<span class="ltx_bibblock">
W. Peng, G. Li, Y. Jiang, Z. Wang, D. Ou, X. Zeng, E. Chen <em class="ltx_emph ltx_font_italic" id="bib.bib9.1.1">et al.</em>, “Large language model based long-tail query rewriting in taobao search,” <em class="ltx_emph ltx_font_italic" id="bib.bib9.2.2">arXiv preprint arXiv:2311.03758</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib10">
<span class="ltx_tag ltx_tag_bibitem">[10]</span>
<span class="ltx_bibblock">
H. S. Zheng, S. Mishra, X. Chen, H.-T. Cheng, E. H. Chi, Q. V. Le, and D. Zhou, “Take a step back: Evoking reasoning via abstraction in large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib10.1.1">arXiv preprint arXiv:2310.06117</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib11">
<span class="ltx_tag ltx_tag_bibitem">[11]</span>
<span class="ltx_bibblock">
L. Gao, X. Ma, J. Lin, and J. Callan, “Precise zero-shot dense retrieval without relevance labels,” <em class="ltx_emph ltx_font_italic" id="bib.bib11.1.1">arXiv preprint arXiv:2212.10496</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib12">
<span class="ltx_tag ltx_tag_bibitem">[12]</span>
<span class="ltx_bibblock">
V. Blagojevi, “Enhancing rag pipelines in haystack: Introducing diversityranker and lostinthemiddleranker,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://towardsdatascience.com/enhancing-rag-pipelines-in-haystack-45f14e2bc9f5" title="">https://towardsdatascience.com/enhancing-rag-pipelines-in-haystack-45f14e2bc9f5</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib13">
<span class="ltx_tag ltx_tag_bibitem">[13]</span>
<span class="ltx_bibblock">
W. Yu, D. Iter, S. Wang, Y. Xu, M. Ju, S. Sanyal, C. Zhu, M. Zeng, and M. Jiang, “Generate rather than retrieve: Large language models are strong context generators,” <em class="ltx_emph ltx_font_italic" id="bib.bib13.1.1">arXiv preprint arXiv:2209.10063</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib14">
<span class="ltx_tag ltx_tag_bibitem">[14]</span>
<span class="ltx_bibblock">
Z. Shao, Y. Gong, Y. Shen, M. Huang, N. Duan, and W. Chen, “Enhancing retrieval-augmented large language models with iterative retrieval-generation synergy,” <em class="ltx_emph ltx_font_italic" id="bib.bib14.1.1">arXiv preprint arXiv:2305.15294</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib15">
<span class="ltx_tag ltx_tag_bibitem">[15]</span>
<span class="ltx_bibblock">
X. Wang, Q. Yang, Y. Qiu, J. Liang, Q. He, Z. Gu, Y. Xiao, and W. Wang, “Knowledgpt: Enhancing large language models with retrieval and storage access on knowledge bases,” <em class="ltx_emph ltx_font_italic" id="bib.bib15.1.1">arXiv preprint arXiv:2308.11761</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib16">
<span class="ltx_tag ltx_tag_bibitem">[16]</span>
<span class="ltx_bibblock">
A. H. Raudaschl, “Forget rag, the future is rag-fusion,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1" title="">https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib17">
<span class="ltx_tag ltx_tag_bibitem">[17]</span>
<span class="ltx_bibblock">
X. Cheng, D. Luo, X. Chen, L. Liu, D. Zhao, and R. Yan, “Lift yourself up: Retrieval-augmented text generation with self memory,” <em class="ltx_emph ltx_font_italic" id="bib.bib17.1.1">arXiv preprint arXiv:2305.02437</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib18">
<span class="ltx_tag ltx_tag_bibitem">[18]</span>
<span class="ltx_bibblock">
S. Wang, Y. Xu, Y. Fang, Y. Liu, S. Sun, R. Xu, C. Zhu, and M. Zeng, “Training data is more valuable than you think: A simple and effective method by retrieving from training data,” <em class="ltx_emph ltx_font_italic" id="bib.bib18.1.1">arXiv preprint arXiv:2203.08773</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib19">
<span class="ltx_tag ltx_tag_bibitem">[19]</span>
<span class="ltx_bibblock">
X. Li, E. Nie, and S. Liang, “From classification to generation: Insights into crosslingual retrieval augmented icl,” <em class="ltx_emph ltx_font_italic" id="bib.bib19.1.1">arXiv preprint arXiv:2311.06595</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib20">
<span class="ltx_tag ltx_tag_bibitem">[20]</span>
<span class="ltx_bibblock">
D. Cheng, S. Huang, J. Bi, Y. Zhan, J. Liu, Y. Wang, H. Sun, F. Wei, D. Deng, and Q. Zhang, “Uprise: Universal prompt retrieval for improving zero-shot evaluation,” <em class="ltx_emph ltx_font_italic" id="bib.bib20.1.1">arXiv preprint arXiv:2303.08518</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib21">
<span class="ltx_tag ltx_tag_bibitem">[21]</span>
<span class="ltx_bibblock">
Z. Dai, V. Y. Zhao, J. Ma, Y. Luan, J. Ni, J. Lu, A. Bakalov, K. Guu, K. B. Hall, and M.-W. Chang, “Promptagator: Few-shot dense retrieval from 8 examples,” <em class="ltx_emph ltx_font_italic" id="bib.bib21.1.1">arXiv preprint arXiv:2209.11755</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib22">
<span class="ltx_tag ltx_tag_bibitem">[22]</span>
<span class="ltx_bibblock">
Z. Sun, X. Wang, Y. Tay, Y. Yang, and D. Zhou, “Recitation-augmented language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib22.1.1">arXiv preprint arXiv:2210.01296</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib23">
<span class="ltx_tag ltx_tag_bibitem">[23]</span>
<span class="ltx_bibblock">
O. Khattab, K. Santhanam, X. L. Li, D. Hall, P. Liang, C. Potts, and M. Zaharia, “Demonstrate-search-predict: Composing retrieval and language models for knowledge-intensive nlp,” <em class="ltx_emph ltx_font_italic" id="bib.bib23.1.1">arXiv preprint arXiv:2212.14024</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib24">
<span class="ltx_tag ltx_tag_bibitem">[24]</span>
<span class="ltx_bibblock">
Z. Jiang, F. F. Xu, L. Gao, Z. Sun, Q. Liu, J. Dwivedi-Yu, Y. Yang, J. Callan, and G. Neubig, “Active retrieval augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib24.1.1">arXiv preprint arXiv:2305.06983</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib25">
<span class="ltx_tag ltx_tag_bibitem">[25]</span>
<span class="ltx_bibblock">
A. Asai, Z. Wu, Y. Wang, A. Sil, and H. Hajishirzi, “Self-rag: Learning to retrieve, generate, and critique through self-reflection,” <em class="ltx_emph ltx_font_italic" id="bib.bib25.1.1">arXiv preprint arXiv:2310.11511</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib26">
<span class="ltx_tag ltx_tag_bibitem">[26]</span>
<span class="ltx_bibblock">
Z. Ke, W. Kong, C. Li, M. Zhang, Q. Mei, and M. Bendersky, “Bridging the preference gap between retrievers and llms,” <em class="ltx_emph ltx_font_italic" id="bib.bib26.1.1">arXiv preprint arXiv:2401.06954</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib27">
<span class="ltx_tag ltx_tag_bibitem">[27]</span>
<span class="ltx_bibblock">
X. V. Lin, X. Chen, M. Chen, W. Shi, M. Lomeli, R. James, P. Rodriguez, J. Kahn, G. Szilvasy, M. Lewis <em class="ltx_emph ltx_font_italic" id="bib.bib27.1.1">et al.</em>, “Ra-dit: Retrieval-augmented dual instruction tuning,” <em class="ltx_emph ltx_font_italic" id="bib.bib27.2.2">arXiv preprint arXiv:2310.01352</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib28">
<span class="ltx_tag ltx_tag_bibitem">[28]</span>
<span class="ltx_bibblock">
O. Ovadia, M. Brief, M. Mishaeli, and O. Elisha, “Fine-tuning or retrieval? comparing knowledge injection in llms,” <em class="ltx_emph ltx_font_italic" id="bib.bib28.1.1">arXiv preprint arXiv:2312.05934</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib29">
<span class="ltx_tag ltx_tag_bibitem">[29]</span>
<span class="ltx_bibblock">
T. Lan, D. Cai, Y. Wang, H. Huang, and X.-L. Mao, “Copy is all you need,” in <em class="ltx_emph ltx_font_italic" id="bib.bib29.1.1">The Eleventh International Conference on Learning Representations</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib30">
<span class="ltx_tag ltx_tag_bibitem">[30]</span>
<span class="ltx_bibblock">
T. Chen, H. Wang, S. Chen, W. Yu, K. Ma, X. Zhao, D. Yu, and H. Zhang, “Dense x retrieval: What retrieval granularity should we use?” <em class="ltx_emph ltx_font_italic" id="bib.bib30.1.1">arXiv preprint arXiv:2312.06648</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib31">
<span class="ltx_tag ltx_tag_bibitem">[31]</span>
<span class="ltx_bibblock">
F. Luo and M. Surdeanu, “Divide &amp; conquer for entailment-aware multi-hop evidence retrieval,” <em class="ltx_emph ltx_font_italic" id="bib.bib31.1.1">arXiv preprint arXiv:2311.02616</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib32">
<span class="ltx_tag ltx_tag_bibitem">[32]</span>
<span class="ltx_bibblock">
Q. Gou, Z. Xia, B. Yu, H. Yu, F. Huang, Y. Li, and N. Cam-Tu, “Diversify question generation with retrieval-augmented style transfer,” <em class="ltx_emph ltx_font_italic" id="bib.bib32.1.1">arXiv preprint arXiv:2310.14503</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib33">
<span class="ltx_tag ltx_tag_bibitem">[33]</span>
<span class="ltx_bibblock">
Z. Guo, S. Cheng, Y. Wang, P. Li, and Y. Liu, “Prompt-guided retrieval augmentation for non-knowledge-intensive tasks,” <em class="ltx_emph ltx_font_italic" id="bib.bib33.1.1">arXiv preprint arXiv:2305.17653</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib34">
<span class="ltx_tag ltx_tag_bibitem">[34]</span>
<span class="ltx_bibblock">
Z. Wang, J. Araki, Z. Jiang, M. R. Parvez, and G. Neubig, “Learning to filter context for retrieval-augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib34.1.1">arXiv preprint arXiv:2311.08377</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib35">
<span class="ltx_tag ltx_tag_bibitem">[35]</span>
<span class="ltx_bibblock">
M. Seo, J. Baek, J. Thorne, and S. J. Hwang, “Retrieval-augmented data augmentation for low-resource domain tasks,” <em class="ltx_emph ltx_font_italic" id="bib.bib35.1.1">arXiv preprint arXiv:2402.13482</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib36">
<span class="ltx_tag ltx_tag_bibitem">[36]</span>
<span class="ltx_bibblock">
Y. Ma, Y. Cao, Y. Hong, and A. Sun, “Large language model is not a good few-shot information extractor, but a good reranker for hard samples!” <em class="ltx_emph ltx_font_italic" id="bib.bib36.1.1">arXiv preprint arXiv:2303.08559</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib37">
<span class="ltx_tag ltx_tag_bibitem">[37]</span>
<span class="ltx_bibblock">
X. Du and H. Ji, “Retrieval-augmented generative question answering for event argument extraction,” <em class="ltx_emph ltx_font_italic" id="bib.bib37.1.1">arXiv preprint arXiv:2211.07067</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib38">
<span class="ltx_tag ltx_tag_bibitem">[38]</span>
<span class="ltx_bibblock">
L. Wang, N. Yang, and F. Wei, “Learning to retrieve in-context examples for large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib38.1.1">arXiv preprint arXiv:2307.07164</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib39">
<span class="ltx_tag ltx_tag_bibitem">[39]</span>
<span class="ltx_bibblock">
S. Rajput, N. Mehta, A. Singh, R. H. Keshavan, T. Vu, L. Heldt, L. Hong, Y. Tay, V. Q. Tran, J. Samost <em class="ltx_emph ltx_font_italic" id="bib.bib39.1.1">et al.</em>, “Recommender systems with generative retrieval,” <em class="ltx_emph ltx_font_italic" id="bib.bib39.2.2">arXiv preprint arXiv:2305.05065</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib40">
<span class="ltx_tag ltx_tag_bibitem">[40]</span>
<span class="ltx_bibblock">
B. Jin, H. Zeng, G. Wang, X. Chen, T. Wei, R. Li, Z. Wang, Z. Li, Y. Li, H. Lu <em class="ltx_emph ltx_font_italic" id="bib.bib40.1.1">et al.</em>, “Language models as semantic indexers,” <em class="ltx_emph ltx_font_italic" id="bib.bib40.2.2">arXiv preprint arXiv:2310.07815</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib41">
<span class="ltx_tag ltx_tag_bibitem">[41]</span>
<span class="ltx_bibblock">
R. Anantha, T. Bethi, D. Vodianik, and S. Chappidi, “Context tuning for retrieval augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib41.1.1">arXiv preprint arXiv:2312.05708</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib42">
<span class="ltx_tag ltx_tag_bibitem">[42]</span>
<span class="ltx_bibblock">
G. Izacard, P. Lewis, M. Lomeli, L. Hosseini, F. Petroni, T. Schick, J. Dwivedi-Yu, A. Joulin, S. Riedel, and E. Grave, “Few-shot learning with retrieval augmented language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib42.1.1">arXiv preprint arXiv:2208.03299</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib43">
<span class="ltx_tag ltx_tag_bibitem">[43]</span>
<span class="ltx_bibblock">
J. Huang, W. Ping, P. Xu, M. Shoeybi, K. C.-C. Chang, and B. Catanzaro, “Raven: In-context learning with retrieval augmented encoder-decoder language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib43.1.1">arXiv preprint arXiv:2308.07922</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib44">
<span class="ltx_tag ltx_tag_bibitem">[44]</span>
<span class="ltx_bibblock">
B. Wang, W. Ping, P. Xu, L. McAfee, Z. Liu, M. Shoeybi, Y. Dong, O. Kuchaiev, B. Li, C. Xiao <em class="ltx_emph ltx_font_italic" id="bib.bib44.1.1">et al.</em>, “Shall we pretrain autoregressive language models with retrieval? a comprehensive study,” <em class="ltx_emph ltx_font_italic" id="bib.bib44.2.2">arXiv preprint arXiv:2304.06762</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib45">
<span class="ltx_tag ltx_tag_bibitem">[45]</span>
<span class="ltx_bibblock">
B. Wang, W. Ping, L. McAfee, P. Xu, B. Li, M. Shoeybi, and B. Catanzaro, “Instructretro: Instruction tuning post retrieval-augmented pretraining,” <em class="ltx_emph ltx_font_italic" id="bib.bib45.1.1">arXiv preprint arXiv:2310.07713</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib46">
<span class="ltx_tag ltx_tag_bibitem">[46]</span>
<span class="ltx_bibblock">
S. Siriwardhana, R. Weerasekera, E. Wen, T. Kaluarachchi, R. Rana, and S. Nanayakkara, “Improving the domain adaptation of retrieval augmented generation (rag) models for open domain question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib46.1.1">Transactions of the Association for Computational Linguistics</em>, vol. 11, pp. 1–17, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib47">
<span class="ltx_tag ltx_tag_bibitem">[47]</span>
<span class="ltx_bibblock">
Z. Yu, C. Xiong, S. Yu, and Z. Liu, “Augmentation-adapted retriever improves generalization of language models as generic plug-in,” <em class="ltx_emph ltx_font_italic" id="bib.bib47.1.1">arXiv preprint arXiv:2305.17331</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib48">
<span class="ltx_tag ltx_tag_bibitem">[48]</span>
<span class="ltx_bibblock">
O. Yoran, T. Wolfson, O. Ram, and J. Berant, “Making retrieval-augmented language models robust to irrelevant context,” <em class="ltx_emph ltx_font_italic" id="bib.bib48.1.1">arXiv preprint arXiv:2310.01558</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib49">
<span class="ltx_tag ltx_tag_bibitem">[49]</span>
<span class="ltx_bibblock">
H.-T. Chen, F. Xu, S. A. Arora, and E. Choi, “Understanding retrieval augmentation for long-form question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib49.1.1">arXiv preprint arXiv:2310.12150</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib50">
<span class="ltx_tag ltx_tag_bibitem">[50]</span>
<span class="ltx_bibblock">
W. Yu, H. Zhang, X. Pan, K. Ma, H. Wang, and D. Yu, “Chain-of-note: Enhancing robustness in retrieval-augmented language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib50.1.1">arXiv preprint arXiv:2311.09210</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib51">
<span class="ltx_tag ltx_tag_bibitem">[51]</span>
<span class="ltx_bibblock">
S. Xu, L. Pang, H. Shen, X. Cheng, and T.-S. Chua, “Search-in-the-chain: Towards accurate, credible and traceable large language models for knowledgeintensive tasks,” <em class="ltx_emph ltx_font_italic" id="bib.bib51.1.1">CoRR, vol. abs/2304.14732</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib52">
<span class="ltx_tag ltx_tag_bibitem">[52]</span>
<span class="ltx_bibblock">
M. Berchansky, P. Izsak, A. Caciularu, I. Dagan, and M. Wasserblat, “Optimizing retrieval-augmented reader models via token elimination,” <em class="ltx_emph ltx_font_italic" id="bib.bib52.1.1">arXiv preprint arXiv:2310.13682</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib53">
<span class="ltx_tag ltx_tag_bibitem">[53]</span>
<span class="ltx_bibblock">
J. Lála, O. O’Donoghue, A. Shtedritski, S. Cox, S. G. Rodriques, and A. D. White, “Paperqa: Retrieval-augmented generative agent for scientific research,” <em class="ltx_emph ltx_font_italic" id="bib.bib53.1.1">arXiv preprint arXiv:2312.07559</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib54">
<span class="ltx_tag ltx_tag_bibitem">[54]</span>
<span class="ltx_bibblock">
F. Cuconasu, G. Trappolini, F. Siciliano, S. Filice, C. Campagnano, Y. Maarek, N. Tonellotto, and F. Silvestri, “The power of noise: Redefining retrieval for rag systems,” <em class="ltx_emph ltx_font_italic" id="bib.bib54.1.1">arXiv preprint arXiv:2401.14887</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib55">
<span class="ltx_tag ltx_tag_bibitem">[55]</span>
<span class="ltx_bibblock">
Z. Zhang, X. Zhang, Y. Ren, S. Shi, M. Han, Y. Wu, R. Lai, and Z. Cao, “Iag: Induction-augmented generation framework for answering reasoning questions,” in <em class="ltx_emph ltx_font_italic" id="bib.bib55.1.1">Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</em>, 2023, pp. 1–14.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib56">
<span class="ltx_tag ltx_tag_bibitem">[56]</span>
<span class="ltx_bibblock">
N. Thakur, L. Bonifacio, X. Zhang, O. Ogundepo, E. Kamalloo, D. Alfonso-Hermelo, X. Li, Q. Liu, B. Chen, M. Rezagholizadeh <em class="ltx_emph ltx_font_italic" id="bib.bib56.1.1">et al.</em>, “Nomiracl: Knowing when you don’t know for robust multilingual retrieval-augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib56.2.2">arXiv preprint arXiv:2312.11361</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib57">
<span class="ltx_tag ltx_tag_bibitem">[57]</span>
<span class="ltx_bibblock">
G. Kim, S. Kim, B. Jeon, J. Park, and J. Kang, “Tree of clarifications: Answering ambiguous questions with retrieval-augmented large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib57.1.1">arXiv preprint arXiv:2310.14696</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib58">
<span class="ltx_tag ltx_tag_bibitem">[58]</span>
<span class="ltx_bibblock">
Y. Wang, P. Li, M. Sun, and Y. Liu, “Self-knowledge guided retrieval augmentation for large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib58.1.1">arXiv preprint arXiv:2310.05002</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib59">
<span class="ltx_tag ltx_tag_bibitem">[59]</span>
<span class="ltx_bibblock">
Z. Feng, X. Feng, D. Zhao, M. Yang, and B. Qin, “Retrieval-generation synergy augmented large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib59.1.1">arXiv preprint arXiv:2310.05149</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib60">
<span class="ltx_tag ltx_tag_bibitem">[60]</span>
<span class="ltx_bibblock">
P. Xu, W. Ping, X. Wu, L. McAfee, C. Zhu, Z. Liu, S. Subramanian, E. Bakhturina, M. Shoeybi, and B. Catanzaro, “Retrieval meets long context large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib60.1.1">arXiv preprint arXiv:2310.03025</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib61">
<span class="ltx_tag ltx_tag_bibitem">[61]</span>
<span class="ltx_bibblock">
H. Trivedi, N. Balasubramanian, T. Khot, and A. Sabharwal, “Interleaving retrieval with chain-of-thought reasoning for knowledge-intensive multi-step questions,” <em class="ltx_emph ltx_font_italic" id="bib.bib61.1.1">arXiv preprint arXiv:2212.10509</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib62">
<span class="ltx_tag ltx_tag_bibitem">[62]</span>
<span class="ltx_bibblock">
R. Ren, Y. Wang, Y. Qu, W. X. Zhao, J. Liu, H. Tian, H. Wu, J.-R. Wen, and H. Wang, “Investigating the factual knowledge boundary of large language models with retrieval augmentation,” <em class="ltx_emph ltx_font_italic" id="bib.bib62.1.1">arXiv preprint arXiv:2307.11019</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib63">
<span class="ltx_tag ltx_tag_bibitem">[63]</span>
<span class="ltx_bibblock">
P. Sarthi, S. Abdullah, A. Tuli, S. Khanna, A. Goldie, and C. D. Manning, “Raptor: Recursive abstractive processing for tree-organized retrieval,” <em class="ltx_emph ltx_font_italic" id="bib.bib63.1.1">arXiv preprint arXiv:2401.18059</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib64">
<span class="ltx_tag ltx_tag_bibitem">[64]</span>
<span class="ltx_bibblock">
O. Ram, Y. Levine, I. Dalmedigos, D. Muhlgay, A. Shashua, K. Leyton-Brown, and Y. Shoham, “In-context retrieval-augmented language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib64.1.1">arXiv preprint arXiv:2302.00083</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib65">
<span class="ltx_tag ltx_tag_bibitem">[65]</span>
<span class="ltx_bibblock">
Y. Ren, Y. Cao, P. Guo, F. Fang, W. Ma, and Z. Lin, “Retrieve-and-sample: Document-level event argument extraction via hybrid retrieval augmentation,” in <em class="ltx_emph ltx_font_italic" id="bib.bib65.1.1">Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</em>, 2023, pp. 293–306.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib66">
<span class="ltx_tag ltx_tag_bibitem">[66]</span>
<span class="ltx_bibblock">
Z. Wang, X. Pan, D. Yu, D. Yu, J. Chen, and H. Ji, “Zemi: Learning zero-shot semi-parametric language models from multiple tasks,” <em class="ltx_emph ltx_font_italic" id="bib.bib66.1.1">arXiv preprint arXiv:2210.00185</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib67">
<span class="ltx_tag ltx_tag_bibitem">[67]</span>
<span class="ltx_bibblock">
S.-Q. Yan, J.-C. Gu, Y. Zhu, and Z.-H. Ling, “Corrective retrieval augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib67.1.1">arXiv preprint arXiv:2401.15884</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib68">
<span class="ltx_tag ltx_tag_bibitem">[68]</span>
<span class="ltx_bibblock">
P. Jain, L. B. Soares, and T. Kwiatkowski, “1-pager: One pass answer generation and evidence retrieval,” <em class="ltx_emph ltx_font_italic" id="bib.bib68.1.1">arXiv preprint arXiv:2310.16568</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib69">
<span class="ltx_tag ltx_tag_bibitem">[69]</span>
<span class="ltx_bibblock">
H. Yang, Z. Li, Y. Zhang, J. Wang, N. Cheng, M. Li, and J. Xiao, “Prca: Fitting black-box large language models for retrieval question answering via pluggable reward-driven contextual adapter,” <em class="ltx_emph ltx_font_italic" id="bib.bib69.1.1">arXiv preprint arXiv:2310.18347</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib70">
<span class="ltx_tag ltx_tag_bibitem">[70]</span>
<span class="ltx_bibblock">
S. Zhuang, B. Liu, B. Koopman, and G. Zuccon, “Open-source large language models are strong zero-shot query likelihood models for document ranking,” <em class="ltx_emph ltx_font_italic" id="bib.bib70.1.1">arXiv preprint arXiv:2310.13243</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib71">
<span class="ltx_tag ltx_tag_bibitem">[71]</span>
<span class="ltx_bibblock">
F. Xu, W. Shi, and E. Choi, “Recomp: Improving retrieval-augmented lms with compression and selective augmentation,” <em class="ltx_emph ltx_font_italic" id="bib.bib71.1.1">arXiv preprint arXiv:2310.04408</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib72">
<span class="ltx_tag ltx_tag_bibitem">[72]</span>
<span class="ltx_bibblock">
W. Shi, S. Min, M. Yasunaga, M. Seo, R. James, M. Lewis, L. Zettlemoyer, and W.-t. Yih, “Replug: Retrieval-augmented black-box language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib72.1.1">arXiv preprint arXiv:2301.12652</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib73">
<span class="ltx_tag ltx_tag_bibitem">[73]</span>
<span class="ltx_bibblock">
E. Melz, “Enhancing llm intelligence with arm-rag: Auxiliary rationale memory for retrieval augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib73.1.1">arXiv preprint arXiv:2311.04177</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib74">
<span class="ltx_tag ltx_tag_bibitem">[74]</span>
<span class="ltx_bibblock">
H. Wang, W. Huang, Y. Deng, R. Wang, Z. Wang, Y. Wang, F. Mi, J. Z. Pan, and K.-F. Wong, “Unims-rag: A unified multi-source retrieval-augmented generation for personalized dialogue systems,” <em class="ltx_emph ltx_font_italic" id="bib.bib74.1.1">arXiv preprint arXiv:2401.13256</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib75">
<span class="ltx_tag ltx_tag_bibitem">[75]</span>
<span class="ltx_bibblock">
Z. Luo, C. Xu, P. Zhao, X. Geng, C. Tao, J. Ma, Q. Lin, and D. Jiang, “Augmented large language models with parametric knowledge guiding,” <em class="ltx_emph ltx_font_italic" id="bib.bib75.1.1">arXiv preprint arXiv:2305.04757</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib76">
<span class="ltx_tag ltx_tag_bibitem">[76]</span>
<span class="ltx_bibblock">
X. Li, Z. Liu, C. Xiong, S. Yu, Y. Gu, Z. Liu, and G. Yu, “Structure-aware language model pretraining improves dense retrieval on structured data,” <em class="ltx_emph ltx_font_italic" id="bib.bib76.1.1">arXiv preprint arXiv:2305.19912</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib77">
<span class="ltx_tag ltx_tag_bibitem">[77]</span>
<span class="ltx_bibblock">
M. Kang, J. M. Kwak, J. Baek, and S. J. Hwang, “Knowledge graph-augmented language models for knowledge-grounded dialogue generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib77.1.1">arXiv preprint arXiv:2305.18846</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib78">
<span class="ltx_tag ltx_tag_bibitem">[78]</span>
<span class="ltx_bibblock">
W. Shen, Y. Gao, C. Huang, F. Wan, X. Quan, and W. Bi, “Retrieval-generation alignment for end-to-end task-oriented dialogue system,” <em class="ltx_emph ltx_font_italic" id="bib.bib78.1.1">arXiv preprint arXiv:2310.08877</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib79">
<span class="ltx_tag ltx_tag_bibitem">[79]</span>
<span class="ltx_bibblock">
T. Shi, L. Li, Z. Lin, T. Yang, X. Quan, and Q. Wang, “Dual-feedback knowledge retrieval for task-oriented dialogue systems,” <em class="ltx_emph ltx_font_italic" id="bib.bib79.1.1">arXiv preprint arXiv:2310.14528</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib80">
<span class="ltx_tag ltx_tag_bibitem">[80]</span>
<span class="ltx_bibblock">
P. Ranade and A. Joshi, “Fabula: Intelligence report generation using retrieval-augmented narrative construction,” <em class="ltx_emph ltx_font_italic" id="bib.bib80.1.1">arXiv preprint arXiv:2310.13848</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib81">
<span class="ltx_tag ltx_tag_bibitem">[81]</span>
<span class="ltx_bibblock">
X. Jiang, R. Zhang, Y. Xu, R. Qiu, Y. Fang, Z. Wang, J. Tang, H. Ding, X. Chu, J. Zhao <em class="ltx_emph ltx_font_italic" id="bib.bib81.1.1">et al.</em>, “Think and retrieval: A hypothesis knowledge graph enhanced medical large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib81.2.2">arXiv preprint arXiv:2312.15883</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib82">
<span class="ltx_tag ltx_tag_bibitem">[82]</span>
<span class="ltx_bibblock">
J. Baek, S. Jeong, M. Kang, J. C. Park, and S. J. Hwang, “Knowledge-augmented language model verification,” <em class="ltx_emph ltx_font_italic" id="bib.bib82.1.1">arXiv preprint arXiv:2310.12836</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib83">
<span class="ltx_tag ltx_tag_bibitem">[83]</span>
<span class="ltx_bibblock">
L. Luo, Y.-F. Li, G. Haffari, and S. Pan, “Reasoning on graphs: Faithful and interpretable large language model reasoning,” <em class="ltx_emph ltx_font_italic" id="bib.bib83.1.1">arXiv preprint arXiv:2310.01061</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib84">
<span class="ltx_tag ltx_tag_bibitem">[84]</span>
<span class="ltx_bibblock">
X. He, Y. Tian, Y. Sun, N. V. Chawla, T. Laurent, Y. LeCun, X. Bresson, and B. Hooi, “G-retriever: Retrieval-augmented generation for textual graph understanding and question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib84.1.1">arXiv preprint arXiv:2402.07630</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib85">
<span class="ltx_tag ltx_tag_bibitem">[85]</span>
<span class="ltx_bibblock">
L. Zha, J. Zhou, L. Li, R. Wang, Q. Huang, S. Yang, J. Yuan, C. Su, X. Li, A. Su <em class="ltx_emph ltx_font_italic" id="bib.bib85.1.1">et al.</em>, “Tablegpt: Towards unifying tables, nature language and commands into one gpt,” <em class="ltx_emph ltx_font_italic" id="bib.bib85.2.2">arXiv preprint arXiv:2307.08674</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib86">
<span class="ltx_tag ltx_tag_bibitem">[86]</span>
<span class="ltx_bibblock">
M. Gaur, K. Gunaratna, V. Srinivasan, and H. Jin, “Iseeq: Information seeking question generation using dynamic meta-information retrieval and knowledge graphs,” in <em class="ltx_emph ltx_font_italic" id="bib.bib86.1.1">Proceedings of the AAAI Conference on Artificial Intelligence</em>, vol. 36, no. 10, 2022, pp. 10 672–10 680.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib87">
<span class="ltx_tag ltx_tag_bibitem">[87]</span>
<span class="ltx_bibblock">
F. Shi, X. Chen, K. Misra, N. Scales, D. Dohan, E. H. Chi, N. Schärli, and D. Zhou, “Large language models can be easily distracted by irrelevant context,” in <em class="ltx_emph ltx_font_italic" id="bib.bib87.1.1">International Conference on Machine Learning</em>.   PMLR, 2023, pp. 31 210–31 227.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib88">
<span class="ltx_tag ltx_tag_bibitem">[88]</span>
<span class="ltx_bibblock">
R. Teja, “Evaluating the ideal chunk size for a rag system using llamaindex,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.llamaindex.ai/blog/evaluating-the-ideal-chunk-size-for-a-rag-system-using-llamaindex-6207e5d3fec5" title="">https://www.llamaindex.ai/blog/evaluating-the-ideal-chunk-size-for-a-rag-system-using-llamaindex-6207e5d3fec5</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib89">
<span class="ltx_tag ltx_tag_bibitem">[89]</span>
<span class="ltx_bibblock">
Langchain, “Recursively split by character,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter" title="">https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib90">
<span class="ltx_tag ltx_tag_bibitem">[90]</span>
<span class="ltx_bibblock">
S. Yang, “Advanced rag 01: Small-to-big retrieval,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://towardsdatascience.com/advanced-rag-01-small-to-big-retrieval-172181b396d4" title="">https://towardsdatascience.com/advanced-rag-01-small-to-big-retrieval-172181b396d4</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib91">
<span class="ltx_tag ltx_tag_bibitem">[91]</span>
<span class="ltx_bibblock">
Y. Wang, N. Lipka, R. A. Rossi, A. Siu, R. Zhang, and T. Derr, “Knowledge graph prompting for multi-document question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib91.1.1">arXiv preprint arXiv:2308.11730</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib92">
<span class="ltx_tag ltx_tag_bibitem">[92]</span>
<span class="ltx_bibblock">
D. Zhou, N. Schärli, L. Hou, J. Wei, N. Scales, X. Wang, D. Schuurmans, C. Cui, O. Bousquet, Q. Le <em class="ltx_emph ltx_font_italic" id="bib.bib92.1.1">et al.</em>, “Least-to-most prompting enables complex reasoning in large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib92.2.2">arXiv preprint arXiv:2205.10625</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib93">
<span class="ltx_tag ltx_tag_bibitem">[93]</span>
<span class="ltx_bibblock">
S. Dhuliawala, M. Komeili, J. Xu, R. Raileanu, X. Li, A. Celikyilmaz, and J. Weston, “Chain-of-verification reduces hallucination in large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib93.1.1">arXiv preprint arXiv:2309.11495</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib94">
<span class="ltx_tag ltx_tag_bibitem">[94]</span>
<span class="ltx_bibblock">
X. Li and J. Li, “Angle-optimized text embeddings,” <em class="ltx_emph ltx_font_italic" id="bib.bib94.1.1">arXiv preprint arXiv:2309.12871</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib95">
<span class="ltx_tag ltx_tag_bibitem">[95]</span>
<span class="ltx_bibblock">
VoyageAI, “Voyage’s embedding models,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://docs.voyageai.com/embeddings/" title="">https://docs.voyageai.com/embeddings/</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib96">
<span class="ltx_tag ltx_tag_bibitem">[96]</span>
<span class="ltx_bibblock">
BAAI, “Flagembedding,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://github.com/FlagOpen/FlagEmbedding" title="">https://github.com/FlagOpen/FlagEmbedding</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib97">
<span class="ltx_tag ltx_tag_bibitem">[97]</span>
<span class="ltx_bibblock">
P. Zhang, S. Xiao, Z. Liu, Z. Dou, and J.-Y. Nie, “Retrieve anything to augment large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib97.1.1">arXiv preprint arXiv:2310.07554</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib98">
<span class="ltx_tag ltx_tag_bibitem">[98]</span>
<span class="ltx_bibblock">
N. F. Liu, K. Lin, J. Hewitt, A. Paranjape, M. Bevilacqua, F. Petroni, and P. Liang, “Lost in the middle: How language models use long contexts,” <em class="ltx_emph ltx_font_italic" id="bib.bib98.1.1">arXiv preprint arXiv:2307.03172</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib99">
<span class="ltx_tag ltx_tag_bibitem">[99]</span>
<span class="ltx_bibblock">
Y. Gao, T. Sheng, Y. Xiang, Y. Xiong, H. Wang, and J. Zhang, “Chat-rec: Towards interactive and explainable llms-augmented recommender system,” <em class="ltx_emph ltx_font_italic" id="bib.bib99.1.1">arXiv preprint arXiv:2303.14524</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib100">
<span class="ltx_tag ltx_tag_bibitem">[100]</span>
<span class="ltx_bibblock">
N. Anderson, C. Wilson, and S. D. Richardson, “Lingua: Addressing scenarios for live interpretation and automatic dubbing,” in <em class="ltx_emph ltx_font_italic" id="bib.bib100.1.1">Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)</em>, J. Campbell, S. Larocca, J. Marciano, K. Savenkov, and A. Yanishevsky, Eds.   Orlando, USA: Association for Machine Translation in the Americas, Sep. 2022, pp. 202–209. [Online]. Available: <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://aclanthology.org/2022.amta-upg.14" title="">https://aclanthology.org/2022.amta-upg.14</a>
</span>
</li>
<li class="ltx_bibitem" id="bib.bib101">
<span class="ltx_tag ltx_tag_bibitem">[101]</span>
<span class="ltx_bibblock">
H. Jiang, Q. Wu, X. Luo, D. Li, C.-Y. Lin, Y. Yang, and L. Qiu, “Longllmlingua: Accelerating and enhancing llms in long context scenarios via prompt compression,” <em class="ltx_emph ltx_font_italic" id="bib.bib101.1.1">arXiv preprint arXiv:2310.06839</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib102">
<span class="ltx_tag ltx_tag_bibitem">[102]</span>
<span class="ltx_bibblock">
V. Karpukhin, B. Oğuz, S. Min, P. Lewis, L. Wu, S. Edunov, D. Chen, and W.-t. Yih, “Dense passage retrieval for open-domain question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib102.1.1">arXiv preprint arXiv:2004.04906</em>, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib103">
<span class="ltx_tag ltx_tag_bibitem">[103]</span>
<span class="ltx_bibblock">
Y. Ma, Y. Cao, Y. Hong, and A. Sun, “Large language model is not a good few-shot information extractor, but a good reranker for hard samples!” <em class="ltx_emph ltx_font_italic" id="bib.bib103.1.1">ArXiv</em>, vol. abs/2303.08559, 2023. [Online]. Available: <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://api.semanticscholar.org/CorpusID:257532405" title="">https://api.semanticscholar.org/CorpusID:257532405</a>
</span>
</li>
<li class="ltx_bibitem" id="bib.bib104">
<span class="ltx_tag ltx_tag_bibitem">[104]</span>
<span class="ltx_bibblock">
J. Cui, Z. Li, Y. Yan, B. Chen, and L. Yuan, “Chatlaw: Open-source legal large language model with integrated external knowledge bases,” <em class="ltx_emph ltx_font_italic" id="bib.bib104.1.1">arXiv preprint arXiv:2306.16092</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib105">
<span class="ltx_tag ltx_tag_bibitem">[105]</span>
<span class="ltx_bibblock">
O. Yoran, T. Wolfson, O. Ram, and J. Berant, “Making retrieval-augmented language models robust to irrelevant context,” <em class="ltx_emph ltx_font_italic" id="bib.bib105.1.1">arXiv preprint arXiv:2310.01558</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib106">
<span class="ltx_tag ltx_tag_bibitem">[106]</span>
<span class="ltx_bibblock">
X. Li, R. Zhao, Y. K. Chia, B. Ding, L. Bing, S. Joty, and S. Poria, “Chain of knowledge: A framework for grounding large language models with structured knowledge bases,” <em class="ltx_emph ltx_font_italic" id="bib.bib106.1.1">arXiv preprint arXiv:2305.13269</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib107">
<span class="ltx_tag ltx_tag_bibitem">[107]</span>
<span class="ltx_bibblock">
H. Yang, S. Yue, and Y. He, “Auto-gpt for online decision making: Benchmarks and additional opinions,” <em class="ltx_emph ltx_font_italic" id="bib.bib107.1.1">arXiv preprint arXiv:2306.02224</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib108">
<span class="ltx_tag ltx_tag_bibitem">[108]</span>
<span class="ltx_bibblock">
T. Schick, J. Dwivedi-Yu, R. Dessì, R. Raileanu, M. Lomeli, L. Zettlemoyer, N. Cancedda, and T. Scialom, “Toolformer: Language models can teach themselves to use tools,” <em class="ltx_emph ltx_font_italic" id="bib.bib108.1.1">arXiv preprint arXiv:2302.04761</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib109">
<span class="ltx_tag ltx_tag_bibitem">[109]</span>
<span class="ltx_bibblock">
J. Zhang, “Graph-toolformer: To empower llms with graph reasoning ability via prompt augmented by chatgpt,” <em class="ltx_emph ltx_font_italic" id="bib.bib109.1.1">arXiv preprint arXiv:2304.11116</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib110">
<span class="ltx_tag ltx_tag_bibitem">[110]</span>
<span class="ltx_bibblock">
R. Nakano, J. Hilton, S. Balaji, J. Wu, L. Ouyang, C. Kim, C. Hesse, S. Jain, V. Kosaraju, W. Saunders <em class="ltx_emph ltx_font_italic" id="bib.bib110.1.1">et al.</em>, “Webgpt: Browser-assisted question-answering with human feedback,” <em class="ltx_emph ltx_font_italic" id="bib.bib110.2.2">arXiv preprint arXiv:2112.09332</em>, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib111">
<span class="ltx_tag ltx_tag_bibitem">[111]</span>
<span class="ltx_bibblock">
T. Kwiatkowski, J. Palomaki, O. Redfield, M. Collins, A. Parikh, C. Alberti, D. Epstein, I. Polosukhin, J. Devlin, K. Lee <em class="ltx_emph ltx_font_italic" id="bib.bib111.1.1">et al.</em>, “Natural questions: a benchmark for question answering research,” <em class="ltx_emph ltx_font_italic" id="bib.bib111.2.2">Transactions of the Association for Computational Linguistics</em>, vol. 7, pp. 453–466, 2019.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib112">
<span class="ltx_tag ltx_tag_bibitem">[112]</span>
<span class="ltx_bibblock">
Y. Liu, S. Yavuz, R. Meng, M. Moorthy, S. Joty, C. Xiong, and Y. Zhou, “Exploring the integration strategies of retriever and large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib112.1.1">arXiv preprint arXiv:2308.12574</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib113">
<span class="ltx_tag ltx_tag_bibitem">[113]</span>
<span class="ltx_bibblock">
M. Joshi, E. Choi, D. S. Weld, and L. Zettlemoyer, “Triviaqa: A large scale distantly supervised challenge dataset for reading comprehension,” <em class="ltx_emph ltx_font_italic" id="bib.bib113.1.1">arXiv preprint arXiv:1705.03551</em>, 2017.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib114">
<span class="ltx_tag ltx_tag_bibitem">[114]</span>
<span class="ltx_bibblock">
P. Rajpurkar, J. Zhang, K. Lopyrev, and P. Liang, “Squad: 100,000+ questions for machine comprehension of text,” <em class="ltx_emph ltx_font_italic" id="bib.bib114.1.1">arXiv preprint arXiv:1606.05250</em>, 2016.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib115">
<span class="ltx_tag ltx_tag_bibitem">[115]</span>
<span class="ltx_bibblock">
J. Berant, A. Chou, R. Frostig, and P. Liang, “Semantic parsing on freebase from question-answer pairs,” in <em class="ltx_emph ltx_font_italic" id="bib.bib115.1.1">Proceedings of the 2013 conference on empirical methods in natural language processing</em>, 2013, pp. 1533–1544.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib116">
<span class="ltx_tag ltx_tag_bibitem">[116]</span>
<span class="ltx_bibblock">
A. Mallen, A. Asai, V. Zhong, R. Das, H. Hajishirzi, and D. Khashabi, “When not to trust language models: Investigating effectiveness and limitations of parametric and non-parametric memories,” <em class="ltx_emph ltx_font_italic" id="bib.bib116.1.1">arXiv preprint arXiv:2212.10511</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib117">
<span class="ltx_tag ltx_tag_bibitem">[117]</span>
<span class="ltx_bibblock">
T. Nguyen, M. Rosenberg, X. Song, J. Gao, S. Tiwary, R. Majumder, and L. Deng, “Ms marco: A human-generated machine reading comprehension dataset,” 2016.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib118">
<span class="ltx_tag ltx_tag_bibitem">[118]</span>
<span class="ltx_bibblock">
Z. Yang, P. Qi, S. Zhang, Y. Bengio, W. W. Cohen, R. Salakhutdinov, and C. D. Manning, “Hotpotqa: A dataset for diverse, explainable multi-hop question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib118.1.1">arXiv preprint arXiv:1809.09600</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib119">
<span class="ltx_tag ltx_tag_bibitem">[119]</span>
<span class="ltx_bibblock">
X. Ho, A.-K. D. Nguyen, S. Sugawara, and A. Aizawa, “Constructing a multi-hop qa dataset for comprehensive evaluation of reasoning steps,” <em class="ltx_emph ltx_font_italic" id="bib.bib119.1.1">arXiv preprint arXiv:2011.01060</em>, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib120">
<span class="ltx_tag ltx_tag_bibitem">[120]</span>
<span class="ltx_bibblock">
H. Trivedi, N. Balasubramanian, T. Khot, and A. Sabharwal, “Musique: Multihop questions via single-hop question composition,” <em class="ltx_emph ltx_font_italic" id="bib.bib120.1.1">Transactions of the Association for Computational Linguistics</em>, vol. 10, pp. 539–554, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib121">
<span class="ltx_tag ltx_tag_bibitem">[121]</span>
<span class="ltx_bibblock">
A. Fan, Y. Jernite, E. Perez, D. Grangier, J. Weston, and M. Auli, “Eli5: Long form question answering,” <em class="ltx_emph ltx_font_italic" id="bib.bib121.1.1">arXiv preprint arXiv:1907.09190</em>, 2019.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib122">
<span class="ltx_tag ltx_tag_bibitem">[122]</span>
<span class="ltx_bibblock">
T. Kočiskỳ, J. Schwarz, P. Blunsom, C. Dyer, K. M. Hermann, G. Melis, and E. Grefenstette, “The narrativeqa reading comprehension challenge,” <em class="ltx_emph ltx_font_italic" id="bib.bib122.1.1">Transactions of the Association for Computational Linguistics</em>, vol. 6, pp. 317–328, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib123">
<span class="ltx_tag ltx_tag_bibitem">[123]</span>
<span class="ltx_bibblock">
K.-H. Lee, X. Chen, H. Furuta, J. Canny, and I. Fischer, “A human-inspired reading agent with gist memory of very long contexts,” <em class="ltx_emph ltx_font_italic" id="bib.bib123.1.1">arXiv preprint arXiv:2402.09727</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib124">
<span class="ltx_tag ltx_tag_bibitem">[124]</span>
<span class="ltx_bibblock">
I. Stelmakh, Y. Luan, B. Dhingra, and M.-W. Chang, “Asqa: Factoid questions meet long-form answers,” <em class="ltx_emph ltx_font_italic" id="bib.bib124.1.1">arXiv preprint arXiv:2204.06092</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib125">
<span class="ltx_tag ltx_tag_bibitem">[125]</span>
<span class="ltx_bibblock">
M. Zhong, D. Yin, T. Yu, A. Zaidi, M. Mutuma, R. Jha, A. H. Awadallah, A. Celikyilmaz, Y. Liu, X. Qiu <em class="ltx_emph ltx_font_italic" id="bib.bib125.1.1">et al.</em>, “Qmsum: A new benchmark for query-based multi-domain meeting summarization,” <em class="ltx_emph ltx_font_italic" id="bib.bib125.2.2">arXiv preprint arXiv:2104.05938</em>, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib126">
<span class="ltx_tag ltx_tag_bibitem">[126]</span>
<span class="ltx_bibblock">
P. Dasigi, K. Lo, I. Beltagy, A. Cohan, N. A. Smith, and M. Gardner, “A dataset of information-seeking questions and answers anchored in research papers,” <em class="ltx_emph ltx_font_italic" id="bib.bib126.1.1">arXiv preprint arXiv:2105.03011</em>, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib127">
<span class="ltx_tag ltx_tag_bibitem">[127]</span>
<span class="ltx_bibblock">
T. Möller, A. Reina, R. Jayakumar, and M. Pietsch, “Covid-qa: A question answering dataset for covid-19,” in <em class="ltx_emph ltx_font_italic" id="bib.bib127.1.1">ACL 2020 Workshop on Natural Language Processing for COVID-19 (NLP-COVID)</em>, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib128">
<span class="ltx_tag ltx_tag_bibitem">[128]</span>
<span class="ltx_bibblock">
X. Wang, G. H. Chen, D. Song, Z. Zhang, Z. Chen, Q. Xiao, F. Jiang, J. Li, X. Wan, B. Wang <em class="ltx_emph ltx_font_italic" id="bib.bib128.1.1">et al.</em>, “Cmb: A comprehensive medical benchmark in chinese,” <em class="ltx_emph ltx_font_italic" id="bib.bib128.2.2">arXiv preprint arXiv:2308.08833</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib129">
<span class="ltx_tag ltx_tag_bibitem">[129]</span>
<span class="ltx_bibblock">
H. Zeng, “Measuring massive multitask chinese understanding,” <em class="ltx_emph ltx_font_italic" id="bib.bib129.1.1">arXiv preprint arXiv:2304.12986</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib130">
<span class="ltx_tag ltx_tag_bibitem">[130]</span>
<span class="ltx_bibblock">
R. Y. Pang, A. Parrish, N. Joshi, N. Nangia, J. Phang, A. Chen, V. Padmakumar, J. Ma, J. Thompson, H. He <em class="ltx_emph ltx_font_italic" id="bib.bib130.1.1">et al.</em>, “Quality: Question answering with long input texts, yes!” <em class="ltx_emph ltx_font_italic" id="bib.bib130.2.2">arXiv preprint arXiv:2112.08608</em>, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib131">
<span class="ltx_tag ltx_tag_bibitem">[131]</span>
<span class="ltx_bibblock">
P. Clark, I. Cowhey, O. Etzioni, T. Khot, A. Sabharwal, C. Schoenick, and O. Tafjord, “Think you have solved question answering? try arc, the ai2 reasoning challenge,” <em class="ltx_emph ltx_font_italic" id="bib.bib131.1.1">arXiv preprint arXiv:1803.05457</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib132">
<span class="ltx_tag ltx_tag_bibitem">[132]</span>
<span class="ltx_bibblock">
A. Talmor, J. Herzig, N. Lourie, and J. Berant, “Commonsenseqa: A question answering challenge targeting commonsense knowledge,” <em class="ltx_emph ltx_font_italic" id="bib.bib132.1.1">arXiv preprint arXiv:1811.00937</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib133">
<span class="ltx_tag ltx_tag_bibitem">[133]</span>
<span class="ltx_bibblock">
E. Dinan, S. Roller, K. Shuster, A. Fan, M. Auli, and J. Weston, “Wizard of wikipedia: Knowledge-powered conversational agents,” <em class="ltx_emph ltx_font_italic" id="bib.bib133.1.1">arXiv preprint arXiv:1811.01241</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib134">
<span class="ltx_tag ltx_tag_bibitem">[134]</span>
<span class="ltx_bibblock">
H. Wang, M. Hu, Y. Deng, R. Wang, F. Mi, W. Wang, Y. Wang, W.-C. Kwan, I. King, and K.-F. Wong, “Large language models as source planner for personalized knowledge-grounded dialogue,” <em class="ltx_emph ltx_font_italic" id="bib.bib134.1.1">arXiv preprint arXiv:2310.08840</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib135">
<span class="ltx_tag ltx_tag_bibitem">[135]</span>
<span class="ltx_bibblock">
——, “Large language models as source planner for personalized knowledge-grounded dialogue,” <em class="ltx_emph ltx_font_italic" id="bib.bib135.1.1">arXiv preprint arXiv:2310.08840</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib136">
<span class="ltx_tag ltx_tag_bibitem">[136]</span>
<span class="ltx_bibblock">
X. Xu, Z. Gou, W. Wu, Z.-Y. Niu, H. Wu, H. Wang, and S. Wang, “Long time no see! open-domain conversation with long-term persona memory,” <em class="ltx_emph ltx_font_italic" id="bib.bib136.1.1">arXiv preprint arXiv:2203.05797</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib137">
<span class="ltx_tag ltx_tag_bibitem">[137]</span>
<span class="ltx_bibblock">
T.-H. Wen, M. Gasic, N. Mrksic, L. M. Rojas-Barahona, P.-H. Su, S. Ultes, D. Vandyke, and S. Young, “Conditional generation and snapshot learning in neural dialogue systems,” <em class="ltx_emph ltx_font_italic" id="bib.bib137.1.1">arXiv preprint arXiv:1606.03352</em>, 2016.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib138">
<span class="ltx_tag ltx_tag_bibitem">[138]</span>
<span class="ltx_bibblock">
R. He and J. McAuley, “Ups and downs: Modeling the visual evolution of fashion trends with one-class collaborative filtering,” in <em class="ltx_emph ltx_font_italic" id="bib.bib138.1.1">proceedings of the 25th international conference on world wide web</em>, 2016, pp. 507–517.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib139">
<span class="ltx_tag ltx_tag_bibitem">[139]</span>
<span class="ltx_bibblock">
S. Li, H. Ji, and J. Han, “Document-level event argument extraction by conditional generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib139.1.1">arXiv preprint arXiv:2104.05919</em>, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib140">
<span class="ltx_tag ltx_tag_bibitem">[140]</span>
<span class="ltx_bibblock">
S. Ebner, P. Xia, R. Culkin, K. Rawlins, and B. Van Durme, “Multi-sentence argument linking,” <em class="ltx_emph ltx_font_italic" id="bib.bib140.1.1">arXiv preprint arXiv:1911.03766</em>, 2019.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib141">
<span class="ltx_tag ltx_tag_bibitem">[141]</span>
<span class="ltx_bibblock">
H. Elsahar, P. Vougiouklis, A. Remaci, C. Gravier, J. Hare, F. Laforest, and E. Simperl, “T-rex: A large scale alignment of natural language with knowledge base triples,” in <em class="ltx_emph ltx_font_italic" id="bib.bib141.1.1">Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib142">
<span class="ltx_tag ltx_tag_bibitem">[142]</span>
<span class="ltx_bibblock">
O. Levy, M. Seo, E. Choi, and L. Zettlemoyer, “Zero-shot relation extraction via reading comprehension,” <em class="ltx_emph ltx_font_italic" id="bib.bib142.1.1">arXiv preprint arXiv:1706.04115</em>, 2017.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib143">
<span class="ltx_tag ltx_tag_bibitem">[143]</span>
<span class="ltx_bibblock">
R. Zellers, A. Holtzman, Y. Bisk, A. Farhadi, and Y. Choi, “Hellaswag: Can a machine really finish your sentence?” <em class="ltx_emph ltx_font_italic" id="bib.bib143.1.1">arXiv preprint arXiv:1905.07830</em>, 2019.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib144">
<span class="ltx_tag ltx_tag_bibitem">[144]</span>
<span class="ltx_bibblock">
S. Kim, S. J. Joo, D. Kim, J. Jang, S. Ye, J. Shin, and M. Seo, “The cot collection: Improving zero-shot and few-shot learning of language models via chain-of-thought fine-tuning,” <em class="ltx_emph ltx_font_italic" id="bib.bib144.1.1">arXiv preprint arXiv:2305.14045</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib145">
<span class="ltx_tag ltx_tag_bibitem">[145]</span>
<span class="ltx_bibblock">
A. Saha, V. Pahuja, M. Khapra, K. Sankaranarayanan, and S. Chandar, “Complex sequential question answering: Towards learning to converse over linked question answer pairs with a knowledge graph,” in <em class="ltx_emph ltx_font_italic" id="bib.bib145.1.1">Proceedings of the AAAI conference on artificial intelligence</em>, vol. 32, no. 1, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib146">
<span class="ltx_tag ltx_tag_bibitem">[146]</span>
<span class="ltx_bibblock">
D. Hendrycks, C. Burns, S. Basart, A. Zou, M. Mazeika, D. Song, and J. Steinhardt, “Measuring massive multitask language understanding,” <em class="ltx_emph ltx_font_italic" id="bib.bib146.1.1">arXiv preprint arXiv:2009.03300</em>, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib147">
<span class="ltx_tag ltx_tag_bibitem">[147]</span>
<span class="ltx_bibblock">
S. Merity, C. Xiong, J. Bradbury, and R. Socher, “Pointer sentinel mixture models,” <em class="ltx_emph ltx_font_italic" id="bib.bib147.1.1">arXiv preprint arXiv:1609.07843</em>, 2016.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib148">
<span class="ltx_tag ltx_tag_bibitem">[148]</span>
<span class="ltx_bibblock">
M. Geva, D. Khashabi, E. Segal, T. Khot, D. Roth, and J. Berant, “Did aristotle use a laptop? a question answering benchmark with implicit reasoning strategies,” <em class="ltx_emph ltx_font_italic" id="bib.bib148.1.1">Transactions of the Association for Computational Linguistics</em>, vol. 9, pp. 346–361, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib149">
<span class="ltx_tag ltx_tag_bibitem">[149]</span>
<span class="ltx_bibblock">
J. Thorne, A. Vlachos, C. Christodoulopoulos, and A. Mittal, “Fever: a large-scale dataset for fact extraction and verification,” <em class="ltx_emph ltx_font_italic" id="bib.bib149.1.1">arXiv preprint arXiv:1803.05355</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib150">
<span class="ltx_tag ltx_tag_bibitem">[150]</span>
<span class="ltx_bibblock">
N. Kotonya and F. Toni, “Explainable automated fact-checking for public health claims,” <em class="ltx_emph ltx_font_italic" id="bib.bib150.1.1">arXiv preprint arXiv:2010.09926</em>, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib151">
<span class="ltx_tag ltx_tag_bibitem">[151]</span>
<span class="ltx_bibblock">
R. Lebret, D. Grangier, and M. Auli, “Neural text generation from structured data with application to the biography domain,” <em class="ltx_emph ltx_font_italic" id="bib.bib151.1.1">arXiv preprint arXiv:1603.07771</em>, 2016.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib152">
<span class="ltx_tag ltx_tag_bibitem">[152]</span>
<span class="ltx_bibblock">
H. Hayashi, P. Budania, P. Wang, C. Ackerson, R. Neervannan, and G. Neubig, “Wikiasp: A dataset for multi-domain aspect-based summarization,” <em class="ltx_emph ltx_font_italic" id="bib.bib152.1.1">Transactions of the Association for Computational Linguistics</em>, vol. 9, pp. 211–225, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib153">
<span class="ltx_tag ltx_tag_bibitem">[153]</span>
<span class="ltx_bibblock">
S. Narayan, S. B. Cohen, and M. Lapata, “Don’t give me the details, just the summary! topic-aware convolutional neural networks for extreme summarization,” <em class="ltx_emph ltx_font_italic" id="bib.bib153.1.1">arXiv preprint arXiv:1808.08745</em>, 2018.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib154">
<span class="ltx_tag ltx_tag_bibitem">[154]</span>
<span class="ltx_bibblock">
S. Saha, J. A. Junaed, M. Saleki, A. S. Sharma, M. R. Rifat, M. Rahouti, S. I. Ahmed, N. Mohammed, and M. R. Amin, “Vio-lens: A novel dataset of annotated social network posts leading to different forms of communal violence and its evaluation,” in <em class="ltx_emph ltx_font_italic" id="bib.bib154.1.1">Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)</em>, 2023, pp. 72–84.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib155">
<span class="ltx_tag ltx_tag_bibitem">[155]</span>
<span class="ltx_bibblock">
X. Li and D. Roth, “Learning question classifiers,” in <em class="ltx_emph ltx_font_italic" id="bib.bib155.1.1">COLING 2002: The 19th International Conference on Computational Linguistics</em>, 2002.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib156">
<span class="ltx_tag ltx_tag_bibitem">[156]</span>
<span class="ltx_bibblock">
R. Socher, A. Perelygin, J. Wu, J. Chuang, C. D. Manning, A. Y. Ng, and C. Potts, “Recursive deep models for semantic compositionality over a sentiment treebank,” in <em class="ltx_emph ltx_font_italic" id="bib.bib156.1.1">Proceedings of the 2013 conference on empirical methods in natural language processing</em>, 2013, pp. 1631–1642.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib157">
<span class="ltx_tag ltx_tag_bibitem">[157]</span>
<span class="ltx_bibblock">
H. Husain, H.-H. Wu, T. Gazit, M. Allamanis, and M. Brockschmidt, “Codesearchnet challenge: Evaluating the state of semantic code search,” <em class="ltx_emph ltx_font_italic" id="bib.bib157.1.1">arXiv preprint arXiv:1909.09436</em>, 2019.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib158">
<span class="ltx_tag ltx_tag_bibitem">[158]</span>
<span class="ltx_bibblock">
K. Cobbe, V. Kosaraju, M. Bavarian, M. Chen, H. Jun, L. Kaiser, M. Plappert, J. Tworek, J. Hilton, R. Nakano <em class="ltx_emph ltx_font_italic" id="bib.bib158.1.1">et al.</em>, “Training verifiers to solve math word problems,” <em class="ltx_emph ltx_font_italic" id="bib.bib158.2.2">arXiv preprint arXiv:2110.14168</em>, 2021.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib159">
<span class="ltx_tag ltx_tag_bibitem">[159]</span>
<span class="ltx_bibblock">
R. Steinberger, B. Pouliquen, A. Widiger, C. Ignat, T. Erjavec, D. Tufis, and D. Varga, “The jrc-acquis: A multilingual aligned parallel corpus with 20+ languages,” <em class="ltx_emph ltx_font_italic" id="bib.bib159.1.1">arXiv preprint cs/0609058</em>, 2006.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib160">
<span class="ltx_tag ltx_tag_bibitem">[160]</span>
<span class="ltx_bibblock">
Y. Hoshi, D. Miyashita, Y. Ng, K. Tatsuno, Y. Morioka, O. Torii, and J. Deguchi, “Ralle: A framework for developing and evaluating retrieval-augmented large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib160.1.1">arXiv preprint arXiv:2308.10633</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib161">
<span class="ltx_tag ltx_tag_bibitem">[161]</span>
<span class="ltx_bibblock">
J. Liu, “Building production-ready rag applications,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.ai.engineer/summit/schedule/building-production-ready-rag-applications" title="">https://www.ai.engineer/summit/schedule/building-production-ready-rag-applications</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib162">
<span class="ltx_tag ltx_tag_bibitem">[162]</span>
<span class="ltx_bibblock">
I. Nguyen, “Evaluating rag part i: How to evaluate document retrieval,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.deepset.ai/blog/rag-evaluation-retrieval" title="">https://www.deepset.ai/blog/rag-evaluation-retrieval</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib163">
<span class="ltx_tag ltx_tag_bibitem">[163]</span>
<span class="ltx_bibblock">
Q. Leng, K. Uhlenhuth, and A. Polyzotis, “Best practices for llm evaluation of rag applications,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://www.databricks.com/blog/LLM-auto-eval-best-practices-RAG" title="">https://www.databricks.com/blog/LLM-auto-eval-best-practices-RAG</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib164">
<span class="ltx_tag ltx_tag_bibitem">[164]</span>
<span class="ltx_bibblock">
S. Es, J. James, L. Espinosa-Anke, and S. Schockaert, “Ragas: Automated evaluation of retrieval augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib164.1.1">arXiv preprint arXiv:2309.15217</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib165">
<span class="ltx_tag ltx_tag_bibitem">[165]</span>
<span class="ltx_bibblock">
J. Saad-Falcon, O. Khattab, C. Potts, and M. Zaharia, “Ares: An automated evaluation framework for retrieval-augmented generation systems,” <em class="ltx_emph ltx_font_italic" id="bib.bib165.1.1">arXiv preprint arXiv:2311.09476</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib166">
<span class="ltx_tag ltx_tag_bibitem">[166]</span>
<span class="ltx_bibblock">
C. Jarvis and J. Allard, “A survey of techniques for maximizing llm performance,” <a class="ltx_ref ltx_url ltx_font_typewriter" href="https://community.openai.com/t/openai-dev-day-2023-breakout-sessions/505213#a-survey-of-techniques-for-maximizing-llm-performance-2" title="">https://community.openai.com/t/openai-dev-day-2023-breakout-sessions/505213#a-survey-of-techniques-for-maximizing-llm-performance-2</a>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib167">
<span class="ltx_tag ltx_tag_bibitem">[167]</span>
<span class="ltx_bibblock">
J. Chen, H. Lin, X. Han, and L. Sun, “Benchmarking large language models in retrieval-augmented generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib167.1.1">arXiv preprint arXiv:2309.01431</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib168">
<span class="ltx_tag ltx_tag_bibitem">[168]</span>
<span class="ltx_bibblock">
Y. Liu, L. Huang, S. Li, S. Chen, H. Zhou, F. Meng, J. Zhou, and X. Sun, “Recall: A benchmark for llms robustness against external counterfactual knowledge,” <em class="ltx_emph ltx_font_italic" id="bib.bib168.1.1">arXiv preprint arXiv:2311.08147</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib169">
<span class="ltx_tag ltx_tag_bibitem">[169]</span>
<span class="ltx_bibblock">
Y. Lyu, Z. Li, S. Niu, F. Xiong, B. Tang, W. Wang, H. Wu, H. Liu, T. Xu, and E. Chen, “Crud-rag: A comprehensive chinese benchmark for retrieval-augmented generation of large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib169.1.1">arXiv preprint arXiv:2401.17043</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib170">
<span class="ltx_tag ltx_tag_bibitem">[170]</span>
<span class="ltx_bibblock">
P. Xu, W. Ping, X. Wu, L. McAfee, C. Zhu, Z. Liu, S. Subramanian, E. Bakhturina, M. Shoeybi, and B. Catanzaro, “Retrieval meets long context large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib170.1.1">arXiv preprint arXiv:2310.03025</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib171">
<span class="ltx_tag ltx_tag_bibitem">[171]</span>
<span class="ltx_bibblock">
C. Packer, V. Fang, S. G. Patil, K. Lin, S. Wooders, and J. E. Gonzalez, “Memgpt: Towards llms as operating systems,” <em class="ltx_emph ltx_font_italic" id="bib.bib171.1.1">arXiv preprint arXiv:2310.08560</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib172">
<span class="ltx_tag ltx_tag_bibitem">[172]</span>
<span class="ltx_bibblock">
G. Xiao, Y. Tian, B. Chen, S. Han, and M. Lewis, “Efficient streaming language models with attention sinks,” <em class="ltx_emph ltx_font_italic" id="bib.bib172.1.1">arXiv preprint arXiv:2309.17453</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib173">
<span class="ltx_tag ltx_tag_bibitem">[173]</span>
<span class="ltx_bibblock">
T. Zhang, S. G. Patil, N. Jain, S. Shen, M. Zaharia, I. Stoica, and J. E. Gonzalez, “Raft: Adapting language model to domain specific rag,” <em class="ltx_emph ltx_font_italic" id="bib.bib173.1.1">arXiv preprint arXiv:2403.10131</em>, 2024.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib174">
<span class="ltx_tag ltx_tag_bibitem">[174]</span>
<span class="ltx_bibblock">
J. Kaplan, S. McCandlish, T. Henighan, T. B. Brown, B. Chess, R. Child, S. Gray, A. Radford, J. Wu, and D. Amodei, “Scaling laws for neural language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib174.1.1">arXiv preprint arXiv:2001.08361</em>, 2020.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib175">
<span class="ltx_tag ltx_tag_bibitem">[175]</span>
<span class="ltx_bibblock">
U. Alon, F. Xu, J. He, S. Sengupta, D. Roth, and G. Neubig, “Neuro-symbolic language modeling with automaton-augmented retrieval,” in <em class="ltx_emph ltx_font_italic" id="bib.bib175.1.1">International Conference on Machine Learning</em>.   PMLR, 2022, pp. 468–485.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib176">
<span class="ltx_tag ltx_tag_bibitem">[176]</span>
<span class="ltx_bibblock">
M. Yasunaga, A. Aghajanyan, W. Shi, R. James, J. Leskovec, P. Liang, M. Lewis, L. Zettlemoyer, and W.-t. Yih, “Retrieval-augmented multimodal language modeling,” <em class="ltx_emph ltx_font_italic" id="bib.bib176.1.1">arXiv preprint arXiv:2211.12561</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib177">
<span class="ltx_tag ltx_tag_bibitem">[177]</span>
<span class="ltx_bibblock">
J. Li, D. Li, S. Savarese, and S. Hoi, “Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models,” <em class="ltx_emph ltx_font_italic" id="bib.bib177.1.1">arXiv preprint arXiv:2301.12597</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib178">
<span class="ltx_tag ltx_tag_bibitem">[178]</span>
<span class="ltx_bibblock">
W. Zhu, A. Yan, Y. Lu, W. Xu, X. E. Wang, M. Eckstein, and W. Y. Wang, “Visualize before you write: Imagination-guided open-ended text generation,” <em class="ltx_emph ltx_font_italic" id="bib.bib178.1.1">arXiv preprint arXiv:2210.03765</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib179">
<span class="ltx_tag ltx_tag_bibitem">[179]</span>
<span class="ltx_bibblock">
J. Zhao, G. Haffar, and E. Shareghi, “Generating synthetic speech from spokenvocab for speech translation,” <em class="ltx_emph ltx_font_italic" id="bib.bib179.1.1">arXiv preprint arXiv:2210.08174</em>, 2022.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib180">
<span class="ltx_tag ltx_tag_bibitem">[180]</span>
<span class="ltx_bibblock">
D. M. Chan, S. Ghosh, A. Rastrow, and B. Hoffmeister, “Using external off-policy speech-to-text mappings in contextual end-to-end automated speech recognition,” <em class="ltx_emph ltx_font_italic" id="bib.bib180.1.1">arXiv preprint arXiv:2301.02736</em>, 2023.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib181">
<span class="ltx_tag ltx_tag_bibitem">[181]</span>
<span class="ltx_bibblock">
A. Yang, A. Nagrani, P. H. Seo, A. Miech, J. Pont-Tuset, I. Laptev, J. Sivic, and C. Schmid, “Vid2seq: Large-scale pretraining of a visual language model for dense video captioning,” in <em class="ltx_emph ltx_font_italic" id="bib.bib181.1.1">Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</em>, 2023, pp. 10 714–10 726.

</span>
</li>
<li class="ltx_bibitem" id="bib.bib182">
<span class="ltx_tag ltx_tag_bibitem">[182]</span>
<span class="ltx_bibblock">
N. Nashid, M. Sintaha, and A. Mesbah, “Retrieval-based prompt selection for code-related few-shot learning,” in <em class="ltx_emph ltx_font_italic" id="bib.bib182.1.1">2023 IEEE/ACM 45th International Conference on Software Engineering (ICSE)</em>, 2023, pp. 2450–2462.

</span>
</li>
</ul>
</section>
<div class="ltx_pagination ltx_role_newpage"></div>
</article>
</div>
<footer class="ltx_page_footer">
<div class="ltx_page_logo">Generated  on Wed Mar 27 09:16:19 2024 by <a class="ltx_LaTeXML_logo" href="http://dlmf.nist.gov/LaTeXML/"><span style="letter-spacing:-0.2em; margin-right:0.1em;">L<span style="font-size:70%;position:relative; bottom:2.2pt;">A</span>T<span style="position:relative; bottom:-0.4ex;">E</span></span><span class="ltx_font_smallcaps">xml</span><img alt="[LOGO]" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAsAAAAOCAYAAAD5YeaVAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9wKExQZLWTEaOUAAAAddEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIFRoZSBHSU1Q72QlbgAAAdpJREFUKM9tkL+L2nAARz9fPZNCKFapUn8kyI0e4iRHSR1Kb8ng0lJw6FYHFwv2LwhOpcWxTjeUunYqOmqd6hEoRDhtDWdA8ApRYsSUCDHNt5ul13vz4w0vWCgUnnEc975arX6ORqN3VqtVZbfbTQC4uEHANM3jSqXymFI6yWazP2KxWAXAL9zCUa1Wy2tXVxheKA9YNoR8Pt+aTqe4FVVVvz05O6MBhqUIBGk8Hn8HAOVy+T+XLJfLS4ZhTiRJgqIoVBRFIoric47jPnmeB1mW/9rr9ZpSSn3Lsmir1fJZlqWlUonKsvwWwD8ymc/nXwVBeLjf7xEKhdBut9Hr9WgmkyGEkJwsy5eHG5vN5g0AKIoCAEgkEkin0wQAfN9/cXPdheu6P33fBwB4ngcAcByHJpPJl+fn54mD3Gg0NrquXxeLRQAAwzAYj8cwTZPwPH9/sVg8PXweDAauqqr2cDjEer1GJBLBZDJBs9mE4zjwfZ85lAGg2+06hmGgXq+j3+/DsixYlgVN03a9Xu8jgCNCyIegIAgx13Vfd7vdu+FweG8YRkjXdWy329+dTgeSJD3ieZ7RNO0VAXAPwDEAO5VKndi2fWrb9jWl9Esul6PZbDY9Go1OZ7PZ9z/lyuD3OozU2wAAAABJRU5ErkJggg=="/></a>
</div></footer>
</div>
</body>
</html>