feat(ai): 优化AI模块路由,支持文档分片管理功能

- 将AI对话与知识库控制器基础路径改为/api/v1/前缀
- 修改ChatRequest中sessionId类型为String,统一会话ID处理
- 新增DocumentChunkVO用于表示文档分片实体
- 在AiKnowledgeBaseService及实现类中添加获取文档分片列表及详情接口
- 在AiKnowledgeBaseController新增两个REST API:获取文档分片列表和获取分片详情
- AiDocumentMapper新增数据库操作方法selectDocumentChunks和selectChunkById
- 修改向量存储切片保存逻辑,确保分片含完整项目和文档属性元数据
- 优化文档分片持久化时更新原始
This commit is contained in:
2026-03-30 18:12:37 +08:00
parent 4ca9c63302
commit 4399550418
10 changed files with 235 additions and 42 deletions

View File

@@ -1,5 +1,6 @@
package cn.yinlihupo.service.ai;
import cn.yinlihupo.domain.vo.DocumentChunkVO;
import cn.yinlihupo.domain.vo.KbDocumentVO;
import org.springframework.web.multipart.MultipartFile;
@@ -57,4 +58,20 @@ public interface AiKnowledgeBaseService {
* @param docId 文档ID
*/
void processDocumentAsync(String docId);
/**
* 获取文档分片列表
*
* @param docId 文档ID
* @return 分片列表
*/
List<DocumentChunkVO> getDocumentChunks(String docId);
/**
* 获取分片详情
*
* @param chunkId 分片ID
* @return 分片详情
*/
DocumentChunkVO getChunkDetail(String chunkId);
}

View File

@@ -67,24 +67,24 @@ public class AiChatServiceImpl implements AiChatService {
@Override
public void streamChat(ChatRequest request, Long userId, SseEmitter emitter) {
long startTime = System.currentTimeMillis();
UUID sessionId = request.getSessionId();
String sessionId = request.getSessionId();
boolean isNewSession = (sessionId == null);
try {
// 1. 获取或创建会话
if (isNewSession) {
sessionId = UUID.randomUUID();
sessionId = UUID.randomUUID().toString();
String title = generateSessionTitle(request.getMessage());
createSession(userId, request.getProjectId(), request.getTimelineNodeId(), request.getMessage(), title);
} else {
// 验证会话权限
if (!hasSessionAccess(sessionId, userId)) {
if (!hasSessionAccess(UUID.fromString(sessionId), userId)) {
sendError(emitter, "无权访问该会话");
return;
}
}
final UUID finalSessionId = sessionId;
final String finalSessionId = sessionId;
// 发送开始消息
sendEvent(emitter, "start", Map.of(
@@ -107,7 +107,7 @@ public class AiChatServiceImpl implements AiChatService {
// 4. 构建Prompt
String systemPrompt = buildSystemPrompt(request.getProjectId(), retrievedDocs);
List<Message> messages = buildMessages(finalSessionId, request.getContextWindow(),
List<Message> messages = buildMessages(UUID.fromString(finalSessionId) , request.getContextWindow(),
systemPrompt, request.getMessage());
// 5. 流式调用LLM
@@ -331,15 +331,15 @@ public class AiChatServiceImpl implements AiChatService {
/**
* 保存消息
*/
private Long saveMessage(UUID sessionId, Long userId, Long projectId,
private Long saveMessage(String sessionId, Long userId, Long projectId,
Long timelineNodeId, String role, String content,
String referencedDocIds) {
// 获取当前最大序号
Integer maxIndex = chatHistoryMapper.selectMaxMessageIndex(sessionId);
Integer maxIndex = chatHistoryMapper.selectMaxMessageIndex(UUID.fromString(sessionId));
int nextIndex = (maxIndex != null ? maxIndex : 0) + 1;
AiChatMessage message = new AiChatMessage();
message.setSessionId(sessionId);
message.setSessionId(UUID.fromString(sessionId));
message.setUserId(userId);
message.setProjectId(projectId);
message.setTimelineNodeId(timelineNodeId);

View File

@@ -1,6 +1,7 @@
package cn.yinlihupo.service.ai.impl;
import cn.yinlihupo.domain.entity.AiDocument;
import cn.yinlihupo.domain.vo.DocumentChunkVO;
import cn.yinlihupo.domain.vo.KbDocumentVO;
import cn.yinlihupo.mapper.AiDocumentMapper;
import cn.yinlihupo.service.ai.AiKnowledgeBaseService;
@@ -209,4 +210,14 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
vo.setCreateTime(doc.getCreateTime());
return vo;
}
@Override
public List<DocumentChunkVO> getDocumentChunks(String docId) {
return documentMapper.selectDocumentChunks(docId);
}
@Override
public DocumentChunkVO getChunkDetail(String chunkId) {
return documentMapper.selectChunkById(chunkId);
}
}

View File

@@ -14,6 +14,7 @@ import org.springframework.stereotype.Component;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@@ -165,44 +166,51 @@ public class DocumentProcessor {
/**
* 存储切片到向量库
* 每个分片都是独立记录,包含完整的项目属性用于检索
*
* @param parentDoc 父文档
* @param chunks 切片列表
* @param doc 文档实体(包含项目属性)
* @param chunks 切片列表
*/
private void storeChunks(AiDocument parentDoc, List<String> chunks) {
String parentId = parentDoc.getId();
private void storeChunks(AiDocument doc, List<String> chunks) {
String docId = doc.getId();
for (int i = 0; i < chunks.size(); i++) {
String chunkContent = chunks.get(i);
// 使用UUID生成唯一的chunk ID确保格式正确
String chunkId = UUID.randomUUID().toString();
// 创建向量文档
Document vectorDoc = new Document(
chunkId,
chunkContent,
Map.of(
"project_id", parentDoc.getProjectId() != null ? parentDoc.getProjectId().toString() : "",
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId().toString() : "",
"chunk_index", i,
"chunk_total", chunks.size(),
"chunk_parent_id", parentId,
"title", parentDoc.getTitle() != null ? parentDoc.getTitle() : "",
"source_type", parentDoc.getSourceType() != null ? parentDoc.getSourceType() : "",
"status", "active"
)
);
// 创建向量文档,每个分片都包含完整的项目属性
Map<String, Object> metadata = new HashMap<>();
// 项目关联属性(用于检索过滤)
metadata.put("project_id", doc.getProjectId() != null ? doc.getProjectId().toString() : "");
metadata.put("timeline_node_id", doc.getTimelineNodeId() != null ? doc.getTimelineNodeId().toString() : "");
metadata.put("kb_id", doc.getKbId() != null ? doc.getKbId().toString() : "");
// 文档来源信息
metadata.put("source_type", doc.getSourceType() != null ? doc.getSourceType() : "");
metadata.put("source_id", doc.getSourceId() != null ? doc.getSourceId().toString() : "");
// 文档信息
metadata.put("doc_id", docId); // 原始文档ID用于关联查询
metadata.put("title", doc.getTitle() != null ? doc.getTitle() : "");
metadata.put("doc_type", doc.getDocType() != null ? doc.getDocType() : "");
metadata.put("file_type", doc.getFileType() != null ? doc.getFileType() : "");
// 分片信息
metadata.put("chunk_index", i);
metadata.put("chunk_total", chunks.size());
// 状态
metadata.put("status", "active");
Document vectorDoc = new Document(chunkId, chunkContent, metadata);
// 存储到向量库
vectorStore.add(List.of(vectorDoc));
// 如果是第一个切片,更新父文档内容
if (i == 0) {
parentDoc.setContent(chunkContent);
documentMapper.updateById(parentDoc);
}
log.debug("存储切片: {}/{}, docId: {}, chunkId: {}", i + 1, chunks.size(), docId, chunkId);
}
log.debug("存储切片: {}/{}, parentId: {}, chunkId: {}", i + 1, chunks.size(), parentId, chunkId);
// 更新文档内容为第一个分片(用于预览)
if (!chunks.isEmpty()) {
doc.setContent(chunks.get(0));
documentMapper.updateById(doc);
}
}