feat(ai): 优化AI模块路由,支持文档分片管理功能
- 将AI对话与知识库控制器基础路径改为/api/v1/前缀 - 修改ChatRequest中sessionId类型为String,统一会话ID处理 - 新增DocumentChunkVO用于表示文档分片实体 - 在AiKnowledgeBaseService及实现类中添加获取文档分片列表及详情接口 - 在AiKnowledgeBaseController新增两个REST API:获取文档分片列表和获取分片详情 - AiDocumentMapper新增数据库操作方法selectDocumentChunks和selectChunkById - 修改向量存储切片保存逻辑,确保分片含完整项目和文档属性元数据 - 优化文档分片持久化时更新原始
This commit is contained in:
@@ -25,7 +25,7 @@ import java.util.UUID;
|
||||
*/
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequestMapping("/ai/chat")
|
||||
@RequestMapping("/api/v1/ai/chat")
|
||||
@RequiredArgsConstructor
|
||||
@Tag(name = "AI对话", description = "AI智能问答相关接口")
|
||||
public class AiChatController {
|
||||
|
||||
@@ -3,6 +3,7 @@ package cn.yinlihupo.controller.ai;
|
||||
import cn.yinlihupo.common.core.BaseResponse;
|
||||
import cn.yinlihupo.common.util.ResultUtils;
|
||||
import cn.yinlihupo.common.util.SecurityUtils;
|
||||
import cn.yinlihupo.domain.vo.DocumentChunkVO;
|
||||
import cn.yinlihupo.domain.vo.KbDocumentVO;
|
||||
import cn.yinlihupo.service.ai.AiKnowledgeBaseService;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
@@ -20,7 +21,7 @@ import java.util.List;
|
||||
*/
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequestMapping("/ai/kb")
|
||||
@RequestMapping("/api/v1/ai/kb")
|
||||
@RequiredArgsConstructor
|
||||
@Tag(name = "AI知识库", description = "AI知识库文档管理相关接口")
|
||||
public class AiKnowledgeBaseController {
|
||||
@@ -134,4 +135,53 @@ public class AiKnowledgeBaseController {
|
||||
return ResultUtils.error("重新索引失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档分片列表
|
||||
*
|
||||
* @param docId 文档UUID
|
||||
* @return 分片列表
|
||||
*/
|
||||
@GetMapping("/document/{docId}/chunks")
|
||||
@Operation(summary = "获取文档分片列表", description = "获取指定文档的所有分片信息")
|
||||
public BaseResponse<List<DocumentChunkVO>> getDocumentChunks(@PathVariable String docId) {
|
||||
Long userId = SecurityUtils.getCurrentUserId();
|
||||
if (userId == null) {
|
||||
return ResultUtils.error("用户未登录");
|
||||
}
|
||||
|
||||
try {
|
||||
List<DocumentChunkVO> chunks = knowledgeBaseService.getDocumentChunks(docId);
|
||||
return ResultUtils.success("查询成功", chunks);
|
||||
} catch (Exception e) {
|
||||
log.error("获取文档分片失败: {}", e.getMessage(), e);
|
||||
return ResultUtils.error("获取文档分片失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取分片详情
|
||||
*
|
||||
* @param chunkId 分片ID
|
||||
* @return 分片详情
|
||||
*/
|
||||
@GetMapping("/chunk/{chunkId}")
|
||||
@Operation(summary = "获取分片详情", description = "获取指定分片的详细信息")
|
||||
public BaseResponse<DocumentChunkVO> getChunkDetail(@PathVariable String chunkId) {
|
||||
Long userId = SecurityUtils.getCurrentUserId();
|
||||
if (userId == null) {
|
||||
return ResultUtils.error("用户未登录");
|
||||
}
|
||||
|
||||
try {
|
||||
DocumentChunkVO chunk = knowledgeBaseService.getChunkDetail(chunkId);
|
||||
if (chunk == null) {
|
||||
return ResultUtils.error("分片不存在");
|
||||
}
|
||||
return ResultUtils.success("查询成功", chunk);
|
||||
} catch (Exception e) {
|
||||
log.error("获取分片详情失败: {}", e.getMessage(), e);
|
||||
return ResultUtils.error("获取分片详情失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,6 @@ package cn.yinlihupo.domain.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* AI对话请求DTO
|
||||
*/
|
||||
@@ -13,7 +11,7 @@ public class ChatRequest {
|
||||
/**
|
||||
* 会话ID(为空则新建会话)
|
||||
*/
|
||||
private UUID sessionId;
|
||||
private String sessionId;
|
||||
|
||||
/**
|
||||
* 项目ID(必填)
|
||||
|
||||
55
src/main/java/cn/yinlihupo/domain/vo/DocumentChunkVO.java
Normal file
55
src/main/java/cn/yinlihupo/domain/vo/DocumentChunkVO.java
Normal file
@@ -0,0 +1,55 @@
|
||||
package cn.yinlihupo.domain.vo;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 文档分片VO
|
||||
*/
|
||||
@Data
|
||||
public class DocumentChunkVO {
|
||||
|
||||
/**
|
||||
* 分片ID
|
||||
*/
|
||||
private String id;
|
||||
|
||||
/**
|
||||
* 原始文档ID
|
||||
*/
|
||||
private String docId;
|
||||
|
||||
/**
|
||||
* 分片内容
|
||||
*/
|
||||
private String content;
|
||||
|
||||
/**
|
||||
* 分片序号
|
||||
*/
|
||||
private Integer chunkIndex;
|
||||
|
||||
/**
|
||||
* 总分片数
|
||||
*/
|
||||
private Integer chunkTotal;
|
||||
|
||||
/**
|
||||
* 文档标题
|
||||
*/
|
||||
private String title;
|
||||
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
private String docType;
|
||||
|
||||
/**
|
||||
* 来源类型
|
||||
*/
|
||||
private String sourceType;
|
||||
|
||||
/**
|
||||
* 状态
|
||||
*/
|
||||
private String status;
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package cn.yinlihupo.mapper;
|
||||
|
||||
import cn.yinlihupo.domain.entity.AiDocument;
|
||||
import cn.yinlihupo.domain.vo.DocumentChunkVO;
|
||||
import cn.yinlihupo.domain.vo.KbDocumentVO;
|
||||
import cn.yinlihupo.domain.vo.ReferencedDocVO;
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
@@ -88,4 +89,21 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
* @return 影响行数
|
||||
*/
|
||||
int incrementQueryCount(@Param("id") String id);
|
||||
|
||||
/**
|
||||
* 查询文档分片列表
|
||||
* 通过 metadata 中的 doc_id 字段查询关联的分片
|
||||
*
|
||||
* @param docId 文档ID
|
||||
* @return 分片列表
|
||||
*/
|
||||
List<DocumentChunkVO> selectDocumentChunks(@Param("docId") String docId);
|
||||
|
||||
/**
|
||||
* 查询文档分片详情
|
||||
*
|
||||
* @param chunkId 分片ID
|
||||
* @return 分片详情
|
||||
*/
|
||||
DocumentChunkVO selectChunkById(@Param("chunkId") String chunkId);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package cn.yinlihupo.service.ai;
|
||||
|
||||
import cn.yinlihupo.domain.vo.DocumentChunkVO;
|
||||
import cn.yinlihupo.domain.vo.KbDocumentVO;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
@@ -57,4 +58,20 @@ public interface AiKnowledgeBaseService {
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
void processDocumentAsync(String docId);
|
||||
|
||||
/**
|
||||
* 获取文档分片列表
|
||||
*
|
||||
* @param docId 文档ID
|
||||
* @return 分片列表
|
||||
*/
|
||||
List<DocumentChunkVO> getDocumentChunks(String docId);
|
||||
|
||||
/**
|
||||
* 获取分片详情
|
||||
*
|
||||
* @param chunkId 分片ID
|
||||
* @return 分片详情
|
||||
*/
|
||||
DocumentChunkVO getChunkDetail(String chunkId);
|
||||
}
|
||||
|
||||
@@ -67,24 +67,24 @@ public class AiChatServiceImpl implements AiChatService {
|
||||
@Override
|
||||
public void streamChat(ChatRequest request, Long userId, SseEmitter emitter) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
UUID sessionId = request.getSessionId();
|
||||
String sessionId = request.getSessionId();
|
||||
boolean isNewSession = (sessionId == null);
|
||||
|
||||
try {
|
||||
// 1. 获取或创建会话
|
||||
if (isNewSession) {
|
||||
sessionId = UUID.randomUUID();
|
||||
sessionId = UUID.randomUUID().toString();
|
||||
String title = generateSessionTitle(request.getMessage());
|
||||
createSession(userId, request.getProjectId(), request.getTimelineNodeId(), request.getMessage(), title);
|
||||
} else {
|
||||
// 验证会话权限
|
||||
if (!hasSessionAccess(sessionId, userId)) {
|
||||
if (!hasSessionAccess(UUID.fromString(sessionId), userId)) {
|
||||
sendError(emitter, "无权访问该会话");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
final UUID finalSessionId = sessionId;
|
||||
final String finalSessionId = sessionId;
|
||||
|
||||
// 发送开始消息
|
||||
sendEvent(emitter, "start", Map.of(
|
||||
@@ -107,7 +107,7 @@ public class AiChatServiceImpl implements AiChatService {
|
||||
|
||||
// 4. 构建Prompt
|
||||
String systemPrompt = buildSystemPrompt(request.getProjectId(), retrievedDocs);
|
||||
List<Message> messages = buildMessages(finalSessionId, request.getContextWindow(),
|
||||
List<Message> messages = buildMessages(UUID.fromString(finalSessionId) , request.getContextWindow(),
|
||||
systemPrompt, request.getMessage());
|
||||
|
||||
// 5. 流式调用LLM
|
||||
@@ -331,15 +331,15 @@ public class AiChatServiceImpl implements AiChatService {
|
||||
/**
|
||||
* 保存消息
|
||||
*/
|
||||
private Long saveMessage(UUID sessionId, Long userId, Long projectId,
|
||||
private Long saveMessage(String sessionId, Long userId, Long projectId,
|
||||
Long timelineNodeId, String role, String content,
|
||||
String referencedDocIds) {
|
||||
// 获取当前最大序号
|
||||
Integer maxIndex = chatHistoryMapper.selectMaxMessageIndex(sessionId);
|
||||
Integer maxIndex = chatHistoryMapper.selectMaxMessageIndex(UUID.fromString(sessionId));
|
||||
int nextIndex = (maxIndex != null ? maxIndex : 0) + 1;
|
||||
|
||||
AiChatMessage message = new AiChatMessage();
|
||||
message.setSessionId(sessionId);
|
||||
message.setSessionId(UUID.fromString(sessionId));
|
||||
message.setUserId(userId);
|
||||
message.setProjectId(projectId);
|
||||
message.setTimelineNodeId(timelineNodeId);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package cn.yinlihupo.service.ai.impl;
|
||||
|
||||
import cn.yinlihupo.domain.entity.AiDocument;
|
||||
import cn.yinlihupo.domain.vo.DocumentChunkVO;
|
||||
import cn.yinlihupo.domain.vo.KbDocumentVO;
|
||||
import cn.yinlihupo.mapper.AiDocumentMapper;
|
||||
import cn.yinlihupo.service.ai.AiKnowledgeBaseService;
|
||||
@@ -209,4 +210,14 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
vo.setCreateTime(doc.getCreateTime());
|
||||
return vo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DocumentChunkVO> getDocumentChunks(String docId) {
|
||||
return documentMapper.selectDocumentChunks(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocumentChunkVO getChunkDetail(String chunkId) {
|
||||
return documentMapper.selectChunkById(chunkId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
@@ -165,44 +166,51 @@ public class DocumentProcessor {
|
||||
|
||||
/**
|
||||
* 存储切片到向量库
|
||||
* 每个分片都是独立记录,包含完整的项目属性用于检索
|
||||
*
|
||||
* @param parentDoc 父文档
|
||||
* @param chunks 切片列表
|
||||
* @param doc 文档实体(包含项目属性)
|
||||
* @param chunks 切片列表
|
||||
*/
|
||||
private void storeChunks(AiDocument parentDoc, List<String> chunks) {
|
||||
String parentId = parentDoc.getId();
|
||||
private void storeChunks(AiDocument doc, List<String> chunks) {
|
||||
String docId = doc.getId();
|
||||
|
||||
for (int i = 0; i < chunks.size(); i++) {
|
||||
String chunkContent = chunks.get(i);
|
||||
// 使用UUID生成唯一的chunk ID,确保格式正确
|
||||
String chunkId = UUID.randomUUID().toString();
|
||||
|
||||
// 创建向量文档
|
||||
Document vectorDoc = new Document(
|
||||
chunkId,
|
||||
chunkContent,
|
||||
Map.of(
|
||||
"project_id", parentDoc.getProjectId() != null ? parentDoc.getProjectId().toString() : "",
|
||||
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId().toString() : "",
|
||||
"chunk_index", i,
|
||||
"chunk_total", chunks.size(),
|
||||
"chunk_parent_id", parentId,
|
||||
"title", parentDoc.getTitle() != null ? parentDoc.getTitle() : "",
|
||||
"source_type", parentDoc.getSourceType() != null ? parentDoc.getSourceType() : "",
|
||||
"status", "active"
|
||||
)
|
||||
);
|
||||
// 创建向量文档,每个分片都包含完整的项目属性
|
||||
Map<String, Object> metadata = new HashMap<>();
|
||||
// 项目关联属性(用于检索过滤)
|
||||
metadata.put("project_id", doc.getProjectId() != null ? doc.getProjectId().toString() : "");
|
||||
metadata.put("timeline_node_id", doc.getTimelineNodeId() != null ? doc.getTimelineNodeId().toString() : "");
|
||||
metadata.put("kb_id", doc.getKbId() != null ? doc.getKbId().toString() : "");
|
||||
// 文档来源信息
|
||||
metadata.put("source_type", doc.getSourceType() != null ? doc.getSourceType() : "");
|
||||
metadata.put("source_id", doc.getSourceId() != null ? doc.getSourceId().toString() : "");
|
||||
// 文档信息
|
||||
metadata.put("doc_id", docId); // 原始文档ID,用于关联查询
|
||||
metadata.put("title", doc.getTitle() != null ? doc.getTitle() : "");
|
||||
metadata.put("doc_type", doc.getDocType() != null ? doc.getDocType() : "");
|
||||
metadata.put("file_type", doc.getFileType() != null ? doc.getFileType() : "");
|
||||
// 分片信息
|
||||
metadata.put("chunk_index", i);
|
||||
metadata.put("chunk_total", chunks.size());
|
||||
// 状态
|
||||
metadata.put("status", "active");
|
||||
|
||||
Document vectorDoc = new Document(chunkId, chunkContent, metadata);
|
||||
|
||||
// 存储到向量库
|
||||
vectorStore.add(List.of(vectorDoc));
|
||||
|
||||
// 如果是第一个切片,更新父文档内容
|
||||
if (i == 0) {
|
||||
parentDoc.setContent(chunkContent);
|
||||
documentMapper.updateById(parentDoc);
|
||||
}
|
||||
log.debug("存储切片: {}/{}, docId: {}, chunkId: {}", i + 1, chunks.size(), docId, chunkId);
|
||||
}
|
||||
|
||||
log.debug("存储切片: {}/{}, parentId: {}, chunkId: {}", i + 1, chunks.size(), parentId, chunkId);
|
||||
// 更新文档内容为第一个分片(用于预览)
|
||||
if (!chunks.isEmpty()) {
|
||||
doc.setContent(chunks.get(0));
|
||||
documentMapper.updateById(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="cn.yinlihupo.mapper.AiDocumentMapper">
|
||||
|
||||
<!-- 获取项目文档列表 -->
|
||||
<!-- 获取项目文档列表(只查询原始文档,不包含分片) -->
|
||||
<select id="selectProjectDocuments" resultType="cn.yinlihupo.domain.vo.KbDocumentVO">
|
||||
SELECT
|
||||
vs.id,
|
||||
@@ -20,7 +20,7 @@
|
||||
LEFT JOIN sys_user su ON vs.create_by = su.id
|
||||
WHERE vs.project_id = #{projectId}
|
||||
AND vs.deleted = 0
|
||||
AND vs.chunk_parent_id IS NULL
|
||||
AND (vs.metadata->>'doc_id') IS NULL
|
||||
ORDER BY vs.create_time DESC
|
||||
</select>
|
||||
|
||||
@@ -58,11 +58,11 @@
|
||||
AND deleted = 0
|
||||
</select>
|
||||
|
||||
<!-- 获取父文档的分块数量 -->
|
||||
<!-- 获取文档的分片数量 -->
|
||||
<select id="selectChunkCount" resultType="java.lang.Integer">
|
||||
SELECT COUNT(*)
|
||||
FROM vector_store
|
||||
WHERE chunk_parent_id = #{docId}
|
||||
WHERE metadata->>'doc_id' = #{docId}
|
||||
AND deleted = 0
|
||||
</select>
|
||||
|
||||
@@ -98,4 +98,40 @@
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- 查询文档分片列表 -->
|
||||
<select id="selectDocumentChunks" resultType="cn.yinlihupo.domain.vo.DocumentChunkVO">
|
||||
SELECT
|
||||
id,
|
||||
metadata->>'doc_id' as docId,
|
||||
content,
|
||||
CAST(metadata->>'chunk_index' AS INTEGER) as chunkIndex,
|
||||
CAST(metadata->>'chunk_total' AS INTEGER) as chunkTotal,
|
||||
metadata->>'title' as title,
|
||||
metadata->>'doc_type' as docType,
|
||||
metadata->>'source_type' as sourceType,
|
||||
metadata->>'status' as status
|
||||
FROM vector_store
|
||||
WHERE metadata->>'doc_id' = #{docId}
|
||||
AND deleted = 0
|
||||
ORDER BY CAST(metadata->>'chunk_index' AS INTEGER)
|
||||
</select>
|
||||
|
||||
<!-- 查询分片详情 -->
|
||||
<select id="selectChunkById" resultType="cn.yinlihupo.domain.vo.DocumentChunkVO">
|
||||
SELECT
|
||||
id,
|
||||
metadata->>'doc_id' as docId,
|
||||
content,
|
||||
CAST(metadata->>'chunk_index' AS INTEGER) as chunkIndex,
|
||||
CAST(metadata->>'chunk_total' AS INTEGER) as chunkTotal,
|
||||
metadata->>'title' as title,
|
||||
metadata->>'doc_type' as docType,
|
||||
metadata->>'source_type' as sourceType,
|
||||
metadata->>'status' as status
|
||||
FROM vector_store
|
||||
WHERE id = #{chunkId}
|
||||
AND deleted = 0
|
||||
LIMIT 1
|
||||
</select>
|
||||
|
||||
</mapper>
|
||||
|
||||
Reference in New Issue
Block a user