refactor(ai): 合并ai_document表到vector_store表,切换文档ID类型为字符串

- 删除旧的ai_document表及相关索引,新增vector_store表兼容Spring AI PgVectorStore结构
- 调整实体类AiDocument映射到vector_store表,使用字符串ID代替Long类型
- 修改Mapper接口及XML中所有ID相关SQL使用字符串类型,并替换表名为vector_store
- 修改服务接口与实现类,文档ID参数类型统一为字符串
- 处理文档分块时改用UUID生成chunk ID,确保唯一且格式正确
- 禁用Spring Ai PgVectorStore的自动schema初始化,使用手动创建的表结构
- 更新配置文件OpenAI模型API key及基础URL配置,支持多模型与聊天功能
- 优化日志输出,增加分块文档ID和父文档ID显示,方便调试追踪
This commit is contained in:
2026-03-30 17:43:29 +08:00
parent 9f972f5e30
commit 37da5da044
11 changed files with 202 additions and 188 deletions

View File

@@ -41,7 +41,7 @@ public class SpringAiConfig {
return PgVectorStore.builder(jdbcTemplate, embeddingModel)
.dimensions(1536) // 向量维度,与配置一致
.distanceType(PgVectorStore.PgDistanceType.COSINE_DISTANCE)
.initializeSchema(true) // 自动初始化schema
.initializeSchema(false) // 禁用自动初始化使用SQL文件中已创建的表
.build();
}
}

View File

@@ -9,20 +9,20 @@ import java.time.LocalDate;
import java.time.LocalDateTime;
/**
* AI文档向量实体
* 对应 ai_document
* 向量存储实体
* 对应 vector_store
* 兼容 Spring AI PgVectorStore 默认结构
*/
@Data
@TableName("ai_document")
@TableName("vector_store")
public class AiDocument {
@TableId(type = IdType.AUTO)
private Long id;
/**
* 文档唯一标识(UUID)
* 文档ID字符串类型兼容PgVectorStore
* 使用标准UUID格式带连字符
*/
private String docId;
@TableId(type = IdType.INPUT)
private String id;
/**
* 关联项目ID
@@ -124,7 +124,7 @@ public class AiDocument {
/**
* 父文档ID(分块时使用)
*/
private Long chunkParentId;
private String chunkParentId;
/**
* 标签数组(JSON)

View File

@@ -13,10 +13,10 @@ public class KbDocumentVO {
/**
* 文档ID
*/
private Long id;
private String id;
/**
* 文档UUID
* 文档UUID与id相同
*/
private String docId;

View File

@@ -11,10 +11,10 @@ public class ReferencedDocVO {
/**
* 文档ID
*/
private Long id;
private String id;
/**
* 文档UUID
* 文档UUID与id相同
*/
private String docId;

View File

@@ -10,7 +10,7 @@ import org.apache.ibatis.annotations.Param;
import java.util.List;
/**
* AI文档向量Mapper
* 向量存储Mapper
*/
@Mapper
public interface AiDocumentMapper extends BaseMapper<AiDocument> {
@@ -24,17 +24,17 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
List<KbDocumentVO> selectProjectDocuments(@Param("projectId") Long projectId);
/**
* 根据docId查询文档
* 根据id查询文档
*
* @param docId 文档UUID
* @param docId 文档ID
* @return 文档实体
*/
AiDocument selectByDocId(@Param("docId") String docId);
/**
* 根据docId删除文档
* 根据id删除文档
*
* @param docId 文档UUID
* @param docId 文档ID
* @return 影响行数
*/
int deleteByDocId(@Param("docId") String docId);
@@ -45,7 +45,7 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
* @param docIds 文档ID列表
* @return 文档信息列表
*/
List<ReferencedDocVO> selectReferencedDocs(@Param("docIds") List<Long> docIds);
List<ReferencedDocVO> selectReferencedDocs(@Param("docIds") List<String> docIds);
/**
* 获取父文档的分块数量
@@ -53,12 +53,12 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
* @param docId 父文档ID
* @return 分块数量
*/
Integer selectChunkCount(@Param("docId") Long docId);
Integer selectChunkCount(@Param("docId") String docId);
/**
* 更新文档状态
*
* @param docId 文档UUID
* @param docId 文档ID
* @param status 状态
* @return 影响行数
*/
@@ -67,7 +67,7 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
/**
* 更新文档错误信息
*
* @param docId 文档UUID
* @param docId 文档ID
* @param errorMessage 错误信息
* @return 影响行数
*/
@@ -79,7 +79,7 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
* @param id 文档ID
* @return 影响行数
*/
int incrementViewCount(@Param("id") Long id);
int incrementViewCount(@Param("id") String id);
/**
* 增加文档查询次数
@@ -87,5 +87,5 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
* @param id 文档ID
* @return 影响行数
*/
int incrementQueryCount(@Param("id") Long id);
int incrementQueryCount(@Param("id") String id);
}

View File

@@ -49,12 +49,12 @@ public interface AiKnowledgeBaseService {
*
* @param docId 文档ID
*/
void processDocument(Long docId);
void processDocument(String docId);
/**
* 异步处理文档
*
* @param docId 文档ID
*/
void processDocumentAsync(Long docId);
void processDocumentAsync(String docId);
}

View File

@@ -55,7 +55,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
// 4. 保存文档元数据
AiDocument doc = new AiDocument();
doc.setDocId(docId);
doc.setId(docId); // 设置标准UUID格式的ID
doc.setProjectId(projectId);
doc.setSourceType("upload");
doc.setTitle(originalFilename);
@@ -75,7 +75,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
// 5. 异步处理文档(解析、切片、向量化)
documentProcessor.processDocumentAsync(doc.getId());
log.info("文件上传成功: {}, docId: {}", originalFilename, docId);
log.info("文件上传成功: {}, docId: {}", originalFilename, doc.getId());
// 6. 返回VO
return convertToVO(doc);
@@ -133,13 +133,13 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
}
@Override
public void processDocument(Long docId) {
public void processDocument(String docId) {
documentProcessor.processDocument(docId);
}
@Override
@Async
public void processDocumentAsync(Long docId) {
public void processDocumentAsync(String docId) {
documentProcessor.processDocument(docId);
}
@@ -197,7 +197,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
private KbDocumentVO convertToVO(AiDocument doc) {
KbDocumentVO vo = new KbDocumentVO();
vo.setId(doc.getId());
vo.setDocId(doc.getDocId());
vo.setDocId(doc.getId());
vo.setTitle(doc.getTitle());
vo.setDocType(doc.getDocType());
vo.setFileType(doc.getFileType());

View File

@@ -16,6 +16,7 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
/**
@@ -41,7 +42,7 @@ public class DocumentProcessor {
*
* @param docId 文档ID
*/
public void processDocument(Long docId) {
public void processDocument(String docId) {
AiDocument doc = documentMapper.selectById(docId);
if (doc == null) {
log.error("文档不存在: {}", docId);
@@ -91,7 +92,7 @@ public class DocumentProcessor {
* @param docId 文档ID
*/
@Async("documentTaskExecutor")
public void processDocumentAsync(Long docId) {
public void processDocumentAsync(String docId) {
processDocument(docId);
}
@@ -169,23 +170,25 @@ public class DocumentProcessor {
* @param chunks 切片列表
*/
private void storeChunks(AiDocument parentDoc, List<String> chunks) {
String docId = parentDoc.getDocId();
Long parentId = parentDoc.getId();
String parentId = parentDoc.getId();
for (int i = 0; i < chunks.size(); i++) {
String chunkContent = chunks.get(i);
// 使用UUID生成唯一的chunk ID确保格式正确
String chunkId = UUID.randomUUID().toString();
// 创建向量文档
Document vectorDoc = new Document(
chunkId,
chunkContent,
Map.of(
"doc_id", docId.toString(),
"project_id", parentDoc.getProjectId(),
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId() : "",
"project_id", parentDoc.getProjectId() != null ? parentDoc.getProjectId().toString() : "",
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId().toString() : "",
"chunk_index", i,
"chunk_total", chunks.size(),
"title", parentDoc.getTitle(),
"source_type", parentDoc.getSourceType(),
"chunk_parent_id", parentId,
"title", parentDoc.getTitle() != null ? parentDoc.getTitle() : "",
"source_type", parentDoc.getSourceType() != null ? parentDoc.getSourceType() : "",
"status", "active"
)
);
@@ -199,7 +202,7 @@ public class DocumentProcessor {
documentMapper.updateById(parentDoc);
}
log.debug("存储切片: {}/{}, docId: {}", i + 1, chunks.size(), docId);
log.debug("存储切片: {}/{}, parentId: {}, chunkId: {}", i + 1, chunks.size(), parentId, chunkId);
}
}