Skip to content

Commit dce2c4a

Browse files
committed
✨ 支持批量上传RAG资料
1 parent dff35d8 commit dce2c4a

File tree

9 files changed

+137
-14
lines changed

9 files changed

+137
-14
lines changed

Diff for: twelvet-server/twelvet-server-ai/src/main/java/com/twelvet/server/ai/mapper/AiDocMapper.java

+7
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ public interface AiDocMapper {
3737
*/
3838
int insertAiDoc(AiDoc aiDoc);
3939

40+
/**
41+
* 批量新增AI知识库文档
42+
* @param aiDocList AI知识库文档
43+
* @return 结果
44+
*/
45+
int insertAiDocBatch(List<AiDoc> aiDocList);
46+
4047
/**
4148
* 修改AI知识库文档
4249
* @param aiDoc AI知识库文档

Diff for: twelvet-server/twelvet-server-ai/src/main/java/com/twelvet/server/ai/mapper/AiDocSliceMapper.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public interface AiDocSliceMapper {
4242
* @param aiDocSliceList AI知识库文档分片列表
4343
* @return 结果
4444
*/
45-
void insertAiDocSliceBatch(List<AiDocSlice> aiDocSliceList);
45+
int insertAiDocSliceBatch(List<AiDocSlice> aiDocSliceList);
4646

4747
/**
4848
* 修改AI知识库文档分片

Diff for: twelvet-server/twelvet-server-ai/src/main/java/com/twelvet/server/ai/mq/consumer/RAGMqTopicListener.java

+4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ public RAGMqTopicListener(RAGMqTopicService ragMqTopicService) {
3636
public Consumer<Message<AiDocMqDTO>> addRAGDocChannel() {
3737
return message -> {
3838
log.info("处理添加RAG文档消息: {}", message);
39+
long startTime = System.nanoTime();
3940
ragMqTopicService.addRAGDocChannel(message);
41+
long endTime = System.nanoTime(); // 记录结束时间
42+
long duration = endTime - startTime; // 计算持续时间
43+
System.out.println("运行时间:" + duration / 1000000);
4044
};
4145
}
4246

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package com.twelvet.server.ai.mq.consumer.domain;
2+
3+
import com.twelvet.api.ai.domain.AiDoc;
4+
import com.twelvet.api.ai.domain.AiDocSlice;
5+
import io.swagger.v3.oas.annotations.media.Schema;
6+
import org.springframework.ai.document.Document;
7+
8+
import java.io.Serial;
9+
import java.io.Serializable;
10+
import java.util.List;
11+
12+
/**
13+
* <p>
14+
* 批量处理添加RAG文档消息VO
15+
* <p>
16+
*
17+
* @since 2025/1/10
18+
*/
19+
public class AiDocMqVO implements Serializable {
20+
21+
@Serial
22+
private static final long serialVersionUID = 1L;
23+
24+
/**
25+
* 知识库文档
26+
*/
27+
@Schema(description = "知识库文档")
28+
private AiDoc aiDoc;
29+
30+
/**
31+
* 知识库文档切片
32+
*/
33+
@Schema(description = "知识库文档切片")
34+
private List<AiDocSlice> aiDocSliceList;
35+
36+
/**
37+
* 知识库文档切片向量
38+
*/
39+
@Schema(description = "知识库文档切片向量")
40+
private List<Document> documentList;
41+
42+
public AiDoc getAiDoc() {
43+
return aiDoc;
44+
}
45+
46+
public void setAiDoc(AiDoc aiDoc) {
47+
this.aiDoc = aiDoc;
48+
}
49+
50+
public List<AiDocSlice> getAiDocSliceList() {
51+
return aiDocSliceList;
52+
}
53+
54+
public void setAiDocSliceList(List<AiDocSlice> aiDocSliceList) {
55+
this.aiDocSliceList = aiDocSliceList;
56+
}
57+
58+
public List<Document> getDocumentList() {
59+
return documentList;
60+
}
61+
62+
public void setDocumentList(List<Document> documentList) {
63+
this.documentList = documentList;
64+
}
65+
66+
@Override
67+
public String toString() {
68+
return "AiDocMqVO{" + "aiDoc=" + aiDoc + ", aiDocSliceList=" + aiDocSliceList + ", documentList=" + documentList
69+
+ '}';
70+
}
71+
72+
}

Diff for: twelvet-server/twelvet-server-ai/src/main/java/com/twelvet/server/ai/mq/consumer/service/impl/RAGMqTopicServiceImpl.java

+39-9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.twelvet.framework.security.utils.SecurityUtils;
1111
import com.twelvet.server.ai.mapper.AiDocMapper;
1212
import com.twelvet.server.ai.mapper.AiDocSliceMapper;
13+
import com.twelvet.server.ai.mq.consumer.domain.AiDocMqVO;
1314
import com.twelvet.server.ai.mq.consumer.domain.dto.AiDocMqDTO;
1415
import com.twelvet.server.ai.mq.consumer.service.RAGMqTopicService;
1516
import org.slf4j.Logger;
@@ -138,8 +139,7 @@ else if (RAGEnums.DocSourceTypeEnums.UPLOAD.equals(sourceType)) { // 处理上
138139

139140
List<AiDocDTO.FileDTO> fileList = aiDocMqDTO.getFileList();
140141

141-
List<AiDocSlice> docSliceList = new ArrayList<>();
142-
List<Document> docList = new ArrayList<>();
142+
List<AiDocMqVO> aiDocMqVOList = new ArrayList<>();
143143
// 针对文件进行doc插入
144144
for (AiDocDTO.FileDTO file : fileList) {
145145
String fileName = file.getFileName();
@@ -148,9 +148,13 @@ else if (RAGEnums.DocSourceTypeEnums.UPLOAD.equals(sourceType)) { // 处理上
148148
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(fileUrl);
149149
List<Document> documents = tikaDocumentReader.get();
150150
if (CollectionUtil.isEmpty(documents)) {
151-
throw new TWTException("上传文件识别为空,空数据");
151+
throw new TWTException("上传文件识别为空数据");
152152
}
153153

154+
AiDocMqVO aiDocMqVO = new AiDocMqVO();
155+
List<AiDocSlice> docSliceList = new ArrayList<>();
156+
List<Document> docList = new ArrayList<>();
157+
154158
AiDoc aiDoc = new AiDoc();
155159
aiDoc.setDocName(fileName);
156160
aiDoc.setKnowledgeId(knowledgeId);
@@ -159,10 +163,13 @@ else if (RAGEnums.DocSourceTypeEnums.UPLOAD.equals(sourceType)) { // 处理上
159163
aiDoc.setCreateTime(nowDate);
160164
aiDoc.setUpdateBy(username);
161165
aiDoc.setUpdateTime(nowDate);
162-
// TODO 需要优化批量插入
163-
aiDocMapper.insertAiDoc(aiDoc);
164166

165-
Long docId = aiDoc.getDocId();
167+
// 设置vo信息
168+
aiDocMqVO.setAiDoc(aiDoc);
169+
aiDocMqVO.setAiDocSliceList(docSliceList);
170+
aiDocMqVO.setDocumentList(docList);
171+
// 加入集合
172+
aiDocMqVOList.add(aiDocMqVO);
166173

167174
// 切片文档
168175
for (Document document : documents) {
@@ -171,12 +178,10 @@ else if (RAGEnums.DocSourceTypeEnums.UPLOAD.equals(sourceType)) { // 处理上
171178
for (Document doc : docs) {
172179
Map<String, Object> metadata = doc.getMetadata();
173180
metadata.put(RAGEnums.VectorMetadataEnums.KNOWLEDGE_ID.getCode(), knowledgeId);
174-
metadata.put(RAGEnums.VectorMetadataEnums.DOC_ID.getCode(), docId);
175181

176182
AiDocSlice aiDocSlice = new AiDocSlice();
177183
aiDocSlice.setKnowledgeId(knowledgeId);
178184
aiDocSlice.setVectorId(doc.getId());
179-
aiDocSlice.setDocId(docId);
180185
aiDocSlice.setSliceName(fileName);
181186
aiDocSlice.setContent(doc.getContent());
182187
aiDocSlice.setCreateBy(username);
@@ -192,7 +197,32 @@ else if (RAGEnums.DocSourceTypeEnums.UPLOAD.equals(sourceType)) { // 处理上
192197
}
193198
}
194199

195-
if (CollectionUtil.isNotEmpty(docSliceList)) {
200+
if (CollectionUtil.isNotEmpty(aiDocMqVOList)) {
201+
202+
List<AiDoc> aiDocList = aiDocMqVOList.stream().map(AiDocMqVO::getAiDoc).toList();
203+
// 批量插入文档
204+
aiDocMapper.insertAiDocBatch(aiDocList);
205+
206+
List<AiDocSlice> docSliceList = new ArrayList<>();
207+
List<Document> docList = new ArrayList<>();
208+
209+
for (AiDocMqVO aiDocMqVO : aiDocMqVOList) {
210+
AiDoc aiDoc = aiDocMqVO.getAiDoc();
211+
Long docId = aiDoc.getDocId();
212+
213+
List<AiDocSlice> aiDocSliceList = aiDocMqVO.getAiDocSliceList();
214+
for (AiDocSlice aiDocSlice : aiDocSliceList) {
215+
aiDocSlice.setDocId(docId);
216+
docSliceList.add(aiDocSlice);
217+
}
218+
219+
List<Document> documentList = aiDocMqVO.getDocumentList();
220+
for (Document document : documentList) {
221+
Map<String, Object> metadata = document.getMetadata();
222+
metadata.put(RAGEnums.VectorMetadataEnums.DOC_ID.getCode(), docId);
223+
docList.add(document);
224+
}
225+
}
196226

197227
// 插入切片
198228
aiDocSliceMapper.insertAiDocSliceBatch(docSliceList);

Diff for: twelvet-server/twelvet-server-ai/src/main/java/com/twelvet/server/ai/service/impl/AiDocServiceImpl.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ else if (RAGEnums.DocSourceTypeEnums.UPLOAD.equals(aiDocDTO.getSourceType())) {
117117
aiDocMqDTO.setFileList(aiDocDTO.getFileList());
118118

119119
aiDocMqDTO.setOperatorBy(username);
120-
streamBridge.send(RAGChannel.ADD_RAG_DOC, MessageBuilder.withPayload(aiDocMqDTO).build());
120+
// streamBridge.send(RAGChannel.ADD_RAG_DOC,
121+
// MessageBuilder.withPayload(aiDocMqDTO).build());
121122

122123
return Boolean.TRUE;
123124
}

Diff for: twelvet-server/twelvet-server-ai/src/main/resources/mapper/ai/AiDocMapper.xml

+9
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,15 @@
5656
</trim>
5757
</insert>
5858

59+
<insert id="insertAiDocBatch" parameterType="List" useGeneratedKeys="true" keyProperty="docId">
60+
INSERT INTO ai_doc ( knowledge_id, source_type, doc_name, create_by, create_time, update_by, update_time )
61+
values
62+
<foreach collection="aiDocList" item="aiDoc" separator=",">
63+
(#{aiDoc.knowledgeId}, #{aiDoc.sourceType}, #{aiDoc.docName}, #{aiDoc.createBy}, #{aiDoc.createTime},
64+
#{aiDoc.updateBy}, #{aiDoc.updateTime})
65+
</foreach>
66+
</insert>
67+
5968
<update id="updateAiDoc" parameterType="AiDoc">
6069
update ai_doc
6170
<trim prefix="SET" suffixOverrides=",">

Diff for: twelvet-server/twelvet-server-dfs/src/main/java/com/twelvet/server/dfs/controller/DFSController.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public class DFSController extends TWTController {
3535
* @return JsonResult<List<SysDfs>>
3636
*/
3737
@Operation(summary = "多文件上传")
38-
@Log(service = "多文件上传", businessType = BusinessType.IMPORT)
38+
@Log(service = "多文件上传", businessType = BusinessType.OTHER)
3939
@PostMapping("/batchUpload")
4040
public JsonResult<List<SysDfs>> batchUpload(MultipartFile[] files) {
4141
// 上传并返回访问地址
@@ -51,7 +51,7 @@ public JsonResult<List<SysDfs>> batchUpload(MultipartFile[] files) {
5151
*/
5252
@Operation(summary = "单文件上传")
5353
@PostMapping("/commonUpload")
54-
@Log(service = "单文件上传", businessType = BusinessType.IMPORT)
54+
@Log(service = "单文件上传", businessType = BusinessType.OTHER)
5555
public JsonResult<String> commonUpload(MultipartFile file) {
5656
// 上传并返回访问地址
5757
SysDfs sysDfs = sysFileService.uploadFile(file);

Diff for: twelvet-server/twelvet-server-dfs/src/main/java/com/twelvet/server/dfs/service/impl/DFSServiceImpl.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public List<SysDfs> uploadFiles(MultipartFile[] files) {
6565
long size = file.getSize();
6666

6767
sysDfs.setSize(size);
68-
sysDfs.setPath("/" + key);
68+
sysDfs.setPath(key);
6969
sysDfs.setType(FileUtils.getSuffix(originalFilename));
7070
sysDfs.setFileName(FileUtils.getName(originalFilename));
7171
sysDfs.setOriginalFileName(originalFilename);

0 commit comments

Comments
 (0)