refactor(ai): 合并ai_document表到vector_store表,切换文档ID类型为字符串
- 删除旧的ai_document表及相关索引,新增vector_store表兼容Spring AI PgVectorStore结构 - 调整实体类AiDocument映射到vector_store表,使用字符串ID代替Long类型 - 修改Mapper接口及XML中所有ID相关SQL使用字符串类型,并替换表名为vector_store - 修改服务接口与实现类,文档ID参数类型统一为字符串 - 处理文档分块时改用UUID生成chunk ID,确保唯一且格式正确 - 禁用Spring Ai PgVectorStore的自动schema初始化,使用手动创建的表结构 - 更新配置文件OpenAI模型API key及基础URL配置,支持多模型与聊天功能 - 优化日志输出,增加分块文档ID和父文档ID显示,方便调试追踪
This commit is contained in:
@@ -49,12 +49,12 @@ public interface AiKnowledgeBaseService {
|
||||
*
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
void processDocument(Long docId);
|
||||
void processDocument(String docId);
|
||||
|
||||
/**
|
||||
* 异步处理文档
|
||||
*
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
void processDocumentAsync(Long docId);
|
||||
void processDocumentAsync(String docId);
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
|
||||
// 4. 保存文档元数据
|
||||
AiDocument doc = new AiDocument();
|
||||
doc.setDocId(docId);
|
||||
doc.setId(docId); // 设置标准UUID格式的ID
|
||||
doc.setProjectId(projectId);
|
||||
doc.setSourceType("upload");
|
||||
doc.setTitle(originalFilename);
|
||||
@@ -75,7 +75,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
// 5. 异步处理文档(解析、切片、向量化)
|
||||
documentProcessor.processDocumentAsync(doc.getId());
|
||||
|
||||
log.info("文件上传成功: {}, docId: {}", originalFilename, docId);
|
||||
log.info("文件上传成功: {}, docId: {}", originalFilename, doc.getId());
|
||||
|
||||
// 6. 返回VO
|
||||
return convertToVO(doc);
|
||||
@@ -133,13 +133,13 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processDocument(Long docId) {
|
||||
public void processDocument(String docId) {
|
||||
documentProcessor.processDocument(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Async
|
||||
public void processDocumentAsync(Long docId) {
|
||||
public void processDocumentAsync(String docId) {
|
||||
documentProcessor.processDocument(docId);
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
private KbDocumentVO convertToVO(AiDocument doc) {
|
||||
KbDocumentVO vo = new KbDocumentVO();
|
||||
vo.setId(doc.getId());
|
||||
vo.setDocId(doc.getDocId());
|
||||
vo.setDocId(doc.getId());
|
||||
vo.setTitle(doc.getTitle());
|
||||
vo.setDocType(doc.getDocType());
|
||||
vo.setFileType(doc.getFileType());
|
||||
|
||||
@@ -16,6 +16,7 @@ import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
@@ -41,7 +42,7 @@ public class DocumentProcessor {
|
||||
*
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
public void processDocument(Long docId) {
|
||||
public void processDocument(String docId) {
|
||||
AiDocument doc = documentMapper.selectById(docId);
|
||||
if (doc == null) {
|
||||
log.error("文档不存在: {}", docId);
|
||||
@@ -91,7 +92,7 @@ public class DocumentProcessor {
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
@Async("documentTaskExecutor")
|
||||
public void processDocumentAsync(Long docId) {
|
||||
public void processDocumentAsync(String docId) {
|
||||
processDocument(docId);
|
||||
}
|
||||
|
||||
@@ -169,23 +170,25 @@ public class DocumentProcessor {
|
||||
* @param chunks 切片列表
|
||||
*/
|
||||
private void storeChunks(AiDocument parentDoc, List<String> chunks) {
|
||||
String docId = parentDoc.getDocId();
|
||||
Long parentId = parentDoc.getId();
|
||||
String parentId = parentDoc.getId();
|
||||
|
||||
for (int i = 0; i < chunks.size(); i++) {
|
||||
String chunkContent = chunks.get(i);
|
||||
// 使用UUID生成唯一的chunk ID,确保格式正确
|
||||
String chunkId = UUID.randomUUID().toString();
|
||||
|
||||
// 创建向量文档
|
||||
Document vectorDoc = new Document(
|
||||
chunkId,
|
||||
chunkContent,
|
||||
Map.of(
|
||||
"doc_id", docId.toString(),
|
||||
"project_id", parentDoc.getProjectId(),
|
||||
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId() : "",
|
||||
"project_id", parentDoc.getProjectId() != null ? parentDoc.getProjectId().toString() : "",
|
||||
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId().toString() : "",
|
||||
"chunk_index", i,
|
||||
"chunk_total", chunks.size(),
|
||||
"title", parentDoc.getTitle(),
|
||||
"source_type", parentDoc.getSourceType(),
|
||||
"chunk_parent_id", parentId,
|
||||
"title", parentDoc.getTitle() != null ? parentDoc.getTitle() : "",
|
||||
"source_type", parentDoc.getSourceType() != null ? parentDoc.getSourceType() : "",
|
||||
"status", "active"
|
||||
)
|
||||
);
|
||||
@@ -199,7 +202,7 @@ public class DocumentProcessor {
|
||||
documentMapper.updateById(parentDoc);
|
||||
}
|
||||
|
||||
log.debug("存储切片: {}/{}, docId: {}", i + 1, chunks.size(), docId);
|
||||
log.debug("存储切片: {}/{}, parentId: {}, chunkId: {}", i + 1, chunks.size(), parentId, chunkId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user