refactor(ai): 合并ai_document表到vector_store表,切换文档ID类型为字符串
- 删除旧的ai_document表及相关索引,新增vector_store表兼容Spring AI PgVectorStore结构 - 调整实体类AiDocument映射到vector_store表,使用字符串ID代替Long类型 - 修改Mapper接口及XML中所有ID相关SQL使用字符串类型,并替换表名为vector_store - 修改服务接口与实现类,文档ID参数类型统一为字符串 - 处理文档分块时改用UUID生成chunk ID,确保唯一且格式正确 - 禁用Spring Ai PgVectorStore的自动schema初始化,使用手动创建的表结构 - 更新配置文件OpenAI模型API key及基础URL配置,支持多模型与聊天功能 - 优化日志输出,增加分块文档ID和父文档ID显示,方便调试追踪
This commit is contained in:
@@ -8,6 +8,122 @@
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
|
||||
-- =====================================================
|
||||
-- Spring AI PgVectorStore 向量存储表
|
||||
-- 用于RAG文档向量存储和相似度搜索
|
||||
-- 兼容PgVectorStore默认结构,同时支持完整文档管理
|
||||
-- =====================================================
|
||||
DROP TABLE IF EXISTS vector_store CASCADE;
|
||||
CREATE TABLE vector_store (
|
||||
-- PgVectorStore 核心字段
|
||||
id VARCHAR(255) PRIMARY KEY,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
embedding vector(1536),
|
||||
|
||||
-- 关联关系
|
||||
project_id BIGINT,
|
||||
timeline_node_id BIGINT,
|
||||
kb_id BIGINT,
|
||||
|
||||
-- 文档来源
|
||||
source_type VARCHAR(50),
|
||||
source_id BIGINT,
|
||||
|
||||
-- 文档扩展信息
|
||||
title VARCHAR(500),
|
||||
content_raw TEXT,
|
||||
summary TEXT,
|
||||
|
||||
-- 文档元数据
|
||||
doc_type VARCHAR(50),
|
||||
language VARCHAR(10) DEFAULT 'zh',
|
||||
file_type VARCHAR(50),
|
||||
file_size BIGINT,
|
||||
file_path VARCHAR(500),
|
||||
|
||||
-- 时间信息 (用于时间维度检索)
|
||||
doc_date DATE,
|
||||
doc_datetime TIMESTAMP,
|
||||
|
||||
-- 分块信息(大文档分块存储)
|
||||
chunk_index INT DEFAULT 0,
|
||||
chunk_total INT DEFAULT 1,
|
||||
chunk_parent_id VARCHAR(255),
|
||||
|
||||
-- 标签和分类
|
||||
tags JSONB,
|
||||
category VARCHAR(100),
|
||||
|
||||
-- 使用统计
|
||||
view_count INT DEFAULT 0,
|
||||
query_count INT DEFAULT 0,
|
||||
last_queried_at TIMESTAMP,
|
||||
|
||||
-- 状态
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
error_message TEXT,
|
||||
|
||||
-- 创建信息
|
||||
create_by BIGINT,
|
||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
update_by BIGINT,
|
||||
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
deleted SMALLINT DEFAULT 0,
|
||||
|
||||
-- 外键约束
|
||||
CONSTRAINT fk_vs_project FOREIGN KEY (project_id) REFERENCES project(id) ON DELETE SET NULL,
|
||||
CONSTRAINT fk_vs_timeline FOREIGN KEY (timeline_node_id) REFERENCES project_timeline(id) ON DELETE SET NULL,
|
||||
CONSTRAINT fk_vs_kb FOREIGN KEY (kb_id) REFERENCES ai_knowledge_base(id) ON DELETE SET NULL,
|
||||
CONSTRAINT fk_vs_create_by FOREIGN KEY (create_by) REFERENCES sys_user(id) ON DELETE SET NULL
|
||||
);
|
||||
|
||||
-- 创建向量索引(使用IVFFlat算法,适合中等数据量)
|
||||
CREATE INDEX idx_vector_store_embedding ON vector_store
|
||||
USING ivfflat (embedding vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- 创建其他常用索引
|
||||
CREATE INDEX idx_vs_project ON vector_store(project_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_vs_timeline ON vector_store(timeline_node_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_vs_kb ON vector_store(kb_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_vs_source ON vector_store(source_type, source_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_vs_status ON vector_store(status);
|
||||
CREATE INDEX idx_vs_type ON vector_store(doc_type);
|
||||
CREATE INDEX idx_vs_tags ON vector_store USING GIN(tags);
|
||||
CREATE INDEX idx_vs_chunk_parent ON vector_store(chunk_parent_id) WHERE chunk_parent_id IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE vector_store IS '向量存储表 - 用于RAG文档检索和知识库管理';
|
||||
COMMENT ON COLUMN vector_store.id IS '文档ID(字符串类型,兼容PgVectorStore)';
|
||||
COMMENT ON COLUMN vector_store.content IS '文档内容文本';
|
||||
COMMENT ON COLUMN vector_store.metadata IS '文档元数据(JSONB格式,兼容PgVectorStore)';
|
||||
COMMENT ON COLUMN vector_store.embedding IS '向量嵌入(1536维)';
|
||||
COMMENT ON COLUMN vector_store.project_id IS '关联项目ID';
|
||||
COMMENT ON COLUMN vector_store.timeline_node_id IS '关联时间节点ID';
|
||||
COMMENT ON COLUMN vector_store.kb_id IS '关联知识库ID';
|
||||
COMMENT ON COLUMN vector_store.source_type IS '来源类型: project-项目文档, risk-风险文档, ticket-工单, report-日报, upload-上传文件, knowledge-知识库, chat-对话记录';
|
||||
COMMENT ON COLUMN vector_store.source_id IS '来源记录ID';
|
||||
COMMENT ON COLUMN vector_store.title IS '文档标题';
|
||||
COMMENT ON COLUMN vector_store.content_raw IS '原始内容(带格式)';
|
||||
COMMENT ON COLUMN vector_store.summary IS 'AI生成的摘要';
|
||||
COMMENT ON COLUMN vector_store.doc_type IS '文档类型: requirement-需求, design-设计, plan-计划, report-报告, contract-合同, photo-照片, other-其他';
|
||||
COMMENT ON COLUMN vector_store.language IS '语言: zh-中文, en-英文';
|
||||
COMMENT ON COLUMN vector_store.file_type IS '文件类型: pdf, doc, txt, md, jpg, png等';
|
||||
COMMENT ON COLUMN vector_store.file_size IS '文件大小(字节)';
|
||||
COMMENT ON COLUMN vector_store.file_path IS '文件存储路径';
|
||||
COMMENT ON COLUMN vector_store.doc_date IS '文档日期(如日报日期、照片拍摄日期)';
|
||||
COMMENT ON COLUMN vector_store.doc_datetime IS '文档时间戳';
|
||||
COMMENT ON COLUMN vector_store.chunk_index IS '分块序号';
|
||||
COMMENT ON COLUMN vector_store.chunk_total IS '总分块数';
|
||||
COMMENT ON COLUMN vector_store.chunk_parent_id IS '父文档ID(分块时使用)';
|
||||
COMMENT ON COLUMN vector_store.tags IS '标签数组';
|
||||
COMMENT ON COLUMN vector_store.category IS '分类';
|
||||
COMMENT ON COLUMN vector_store.view_count IS '查看次数';
|
||||
COMMENT ON COLUMN vector_store.query_count IS '被检索次数';
|
||||
COMMENT ON COLUMN vector_store.last_queried_at IS '最后被检索时间';
|
||||
COMMENT ON COLUMN vector_store.status IS '状态: active-可用, processing-处理中, error-错误, archived-归档';
|
||||
COMMENT ON COLUMN vector_store.error_message IS '错误信息';
|
||||
|
||||
-- 设置时区
|
||||
SET timezone = 'Asia/Shanghai';
|
||||
|
||||
@@ -969,118 +1085,8 @@ COMMENT ON COLUMN ai_knowledge_base.status IS '状态: active-可用, archived-
|
||||
-- 11. AI服务相关表
|
||||
-- =====================================================
|
||||
|
||||
-- AI文档向量表 (用于RAG知识库)
|
||||
DROP TABLE IF EXISTS ai_document CASCADE;
|
||||
CREATE TABLE ai_document (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
doc_id UUID DEFAULT uuid_generate_v4(),
|
||||
|
||||
-- 关联关系
|
||||
project_id BIGINT,
|
||||
timeline_node_id BIGINT,
|
||||
kb_id BIGINT,
|
||||
-- 文档来源
|
||||
source_type VARCHAR(50) NOT NULL,
|
||||
source_id BIGINT,
|
||||
|
||||
-- 文档内容
|
||||
title VARCHAR(500),
|
||||
content TEXT NOT NULL,
|
||||
content_raw TEXT,
|
||||
summary TEXT,
|
||||
|
||||
-- 向量嵌入 (1536维适配OpenAI, 可调整为其他维度)
|
||||
embedding vector(1536),
|
||||
|
||||
-- 文档元数据
|
||||
doc_type VARCHAR(50),
|
||||
language VARCHAR(10) DEFAULT 'zh',
|
||||
file_type VARCHAR(50),
|
||||
file_size BIGINT,
|
||||
file_path VARCHAR(500),
|
||||
|
||||
-- 时间信息 (用于时间维度检索)
|
||||
doc_date DATE,
|
||||
doc_datetime TIMESTAMP,
|
||||
|
||||
-- 分块信息(大文档分块存储)
|
||||
chunk_index INT DEFAULT 0,
|
||||
chunk_total INT DEFAULT 1,
|
||||
chunk_parent_id BIGINT,
|
||||
|
||||
-- 标签和分类
|
||||
tags JSONB,
|
||||
category VARCHAR(100),
|
||||
|
||||
-- 使用统计
|
||||
view_count INT DEFAULT 0,
|
||||
query_count INT DEFAULT 0,
|
||||
last_queried_at TIMESTAMP,
|
||||
|
||||
-- 状态
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
error_message TEXT,
|
||||
|
||||
-- 创建信息
|
||||
create_by BIGINT,
|
||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
update_by BIGINT,
|
||||
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
deleted SMALLINT DEFAULT 0,
|
||||
|
||||
-- 外键约束
|
||||
CONSTRAINT fk_ai_doc_project FOREIGN KEY (project_id) REFERENCES project(id) ON DELETE SET NULL,
|
||||
CONSTRAINT fk_ai_doc_timeline FOREIGN KEY (timeline_node_id) REFERENCES project_timeline(id) ON DELETE SET NULL,
|
||||
CONSTRAINT fk_ai_doc_kb FOREIGN KEY (kb_id) REFERENCES ai_knowledge_base(id) ON DELETE SET NULL
|
||||
);
|
||||
|
||||
-- 创建向量索引 (使用IVFFlat或HNSW)
|
||||
-- IVFFlat: 适合中等数据量, 内存占用小
|
||||
-- HNSW: 适合大数据量, 查询更快但内存占用大
|
||||
CREATE INDEX idx_ai_document_embedding ON ai_document
|
||||
USING ivfflat (embedding vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- 创建其他常用索引
|
||||
CREATE INDEX idx_ai_doc_project ON ai_document(project_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_ai_doc_timeline ON ai_document(timeline_node_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_ai_doc_kb ON ai_document(kb_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_ai_doc_source ON ai_document(source_type, source_id) WHERE deleted = 0;
|
||||
CREATE INDEX idx_ai_doc_status ON ai_document(status);
|
||||
CREATE INDEX idx_ai_doc_type ON ai_document(doc_type);
|
||||
CREATE INDEX idx_ai_doc_tags ON ai_document USING GIN(tags);
|
||||
|
||||
COMMENT ON TABLE ai_document IS 'AI文档向量表 - 存储所有用于RAG的文档向量';
|
||||
COMMENT ON COLUMN ai_document.doc_id IS '文档唯一标识';
|
||||
COMMENT ON COLUMN ai_document.project_id IS '关联项目ID';
|
||||
COMMENT ON COLUMN ai_document.timeline_node_id IS '关联时间节点ID';
|
||||
COMMENT ON COLUMN ai_document.kb_id IS '关联知识库ID';
|
||||
COMMENT ON COLUMN ai_document.source_type IS '来源类型: project-项目文档, risk-风险文档, ticket-工单, report-日报, upload-上传文件, knowledge-知识库, chat-对话记录';
|
||||
COMMENT ON COLUMN ai_document.source_id IS '来源记录ID';
|
||||
COMMENT ON COLUMN ai_document.title IS '文档标题';
|
||||
COMMENT ON COLUMN ai_document.content IS '文档内容(纯文本)';
|
||||
COMMENT ON COLUMN ai_document.content_raw IS '原始内容(带格式)';
|
||||
COMMENT ON COLUMN ai_document.summary IS 'AI生成的摘要';
|
||||
COMMENT ON COLUMN ai_document.embedding IS '向量嵌入';
|
||||
COMMENT ON COLUMN ai_document.doc_type IS '文档类型: requirement-需求, design-设计, plan-计划, report-报告, contract-合同, photo-照片, other-其他';
|
||||
COMMENT ON COLUMN ai_document.language IS '语言: zh-中文, en-英文';
|
||||
COMMENT ON COLUMN ai_document.file_type IS '文件类型: pdf, doc, txt, md, jpg, png等';
|
||||
COMMENT ON COLUMN ai_document.file_size IS '文件大小(字节)';
|
||||
COMMENT ON COLUMN ai_document.file_path IS '文件存储路径';
|
||||
COMMENT ON COLUMN ai_document.doc_date IS '文档日期(如日报日期、照片拍摄日期)';
|
||||
COMMENT ON COLUMN ai_document.doc_datetime IS '文档时间戳';
|
||||
COMMENT ON COLUMN ai_document.chunk_index IS '分块序号';
|
||||
COMMENT ON COLUMN ai_document.chunk_total IS '总分块数';
|
||||
COMMENT ON COLUMN ai_document.chunk_parent_id IS '父文档ID(分块时使用)';
|
||||
COMMENT ON COLUMN ai_document.tags IS '标签数组';
|
||||
COMMENT ON COLUMN ai_document.category IS '分类';
|
||||
COMMENT ON COLUMN ai_document.view_count IS '查看次数';
|
||||
COMMENT ON COLUMN ai_document.query_count IS '被检索次数';
|
||||
COMMENT ON COLUMN ai_document.last_queried_at IS '最后被检索时间';
|
||||
COMMENT ON COLUMN ai_document.status IS '状态: active-可用, processing-处理中, error-错误, archived-归档';
|
||||
COMMENT ON COLUMN ai_document.error_message IS '错误信息';
|
||||
|
||||
-- AI对话记录表 (合并会话管理功能,无需单独的session表)
|
||||
-- 注:ai_document表已合并到vector_store表中
|
||||
DROP TABLE IF EXISTS ai_chat_history CASCADE;
|
||||
CREATE TABLE ai_chat_history (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
|
||||
@@ -41,7 +41,7 @@ public class SpringAiConfig {
|
||||
return PgVectorStore.builder(jdbcTemplate, embeddingModel)
|
||||
.dimensions(1536) // 向量维度,与配置一致
|
||||
.distanceType(PgVectorStore.PgDistanceType.COSINE_DISTANCE)
|
||||
.initializeSchema(true) // 自动初始化schema
|
||||
.initializeSchema(false) // 禁用自动初始化,使用SQL文件中已创建的表
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,20 +9,20 @@ import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* AI文档向量实体
|
||||
* 对应 ai_document 表
|
||||
* 向量存储实体
|
||||
* 对应 vector_store 表
|
||||
* 兼容 Spring AI PgVectorStore 默认结构
|
||||
*/
|
||||
@Data
|
||||
@TableName("ai_document")
|
||||
@TableName("vector_store")
|
||||
public class AiDocument {
|
||||
|
||||
@TableId(type = IdType.AUTO)
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 文档唯一标识(UUID)
|
||||
* 文档ID(字符串类型,兼容PgVectorStore)
|
||||
* 使用标准UUID格式(带连字符)
|
||||
*/
|
||||
private String docId;
|
||||
@TableId(type = IdType.INPUT)
|
||||
private String id;
|
||||
|
||||
/**
|
||||
* 关联项目ID
|
||||
@@ -124,7 +124,7 @@ public class AiDocument {
|
||||
/**
|
||||
* 父文档ID(分块时使用)
|
||||
*/
|
||||
private Long chunkParentId;
|
||||
private String chunkParentId;
|
||||
|
||||
/**
|
||||
* 标签数组(JSON)
|
||||
|
||||
@@ -13,10 +13,10 @@ public class KbDocumentVO {
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
private Long id;
|
||||
private String id;
|
||||
|
||||
/**
|
||||
* 文档UUID
|
||||
* 文档UUID(与id相同)
|
||||
*/
|
||||
private String docId;
|
||||
|
||||
|
||||
@@ -11,10 +11,10 @@ public class ReferencedDocVO {
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
private Long id;
|
||||
private String id;
|
||||
|
||||
/**
|
||||
* 文档UUID
|
||||
* 文档UUID(与id相同)
|
||||
*/
|
||||
private String docId;
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import org.apache.ibatis.annotations.Param;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* AI文档向量Mapper
|
||||
* 向量存储Mapper
|
||||
*/
|
||||
@Mapper
|
||||
public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
@@ -24,17 +24,17 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
List<KbDocumentVO> selectProjectDocuments(@Param("projectId") Long projectId);
|
||||
|
||||
/**
|
||||
* 根据docId查询文档
|
||||
* 根据id查询文档
|
||||
*
|
||||
* @param docId 文档UUID
|
||||
* @param docId 文档ID
|
||||
* @return 文档实体
|
||||
*/
|
||||
AiDocument selectByDocId(@Param("docId") String docId);
|
||||
|
||||
/**
|
||||
* 根据docId删除文档
|
||||
* 根据id删除文档
|
||||
*
|
||||
* @param docId 文档UUID
|
||||
* @param docId 文档ID
|
||||
* @return 影响行数
|
||||
*/
|
||||
int deleteByDocId(@Param("docId") String docId);
|
||||
@@ -45,7 +45,7 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
* @param docIds 文档ID列表
|
||||
* @return 文档信息列表
|
||||
*/
|
||||
List<ReferencedDocVO> selectReferencedDocs(@Param("docIds") List<Long> docIds);
|
||||
List<ReferencedDocVO> selectReferencedDocs(@Param("docIds") List<String> docIds);
|
||||
|
||||
/**
|
||||
* 获取父文档的分块数量
|
||||
@@ -53,12 +53,12 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
* @param docId 父文档ID
|
||||
* @return 分块数量
|
||||
*/
|
||||
Integer selectChunkCount(@Param("docId") Long docId);
|
||||
Integer selectChunkCount(@Param("docId") String docId);
|
||||
|
||||
/**
|
||||
* 更新文档状态
|
||||
*
|
||||
* @param docId 文档UUID
|
||||
* @param docId 文档ID
|
||||
* @param status 状态
|
||||
* @return 影响行数
|
||||
*/
|
||||
@@ -67,7 +67,7 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
/**
|
||||
* 更新文档错误信息
|
||||
*
|
||||
* @param docId 文档UUID
|
||||
* @param docId 文档ID
|
||||
* @param errorMessage 错误信息
|
||||
* @return 影响行数
|
||||
*/
|
||||
@@ -79,7 +79,7 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
* @param id 文档ID
|
||||
* @return 影响行数
|
||||
*/
|
||||
int incrementViewCount(@Param("id") Long id);
|
||||
int incrementViewCount(@Param("id") String id);
|
||||
|
||||
/**
|
||||
* 增加文档查询次数
|
||||
@@ -87,5 +87,5 @@ public interface AiDocumentMapper extends BaseMapper<AiDocument> {
|
||||
* @param id 文档ID
|
||||
* @return 影响行数
|
||||
*/
|
||||
int incrementQueryCount(@Param("id") Long id);
|
||||
int incrementQueryCount(@Param("id") String id);
|
||||
}
|
||||
|
||||
@@ -49,12 +49,12 @@ public interface AiKnowledgeBaseService {
|
||||
*
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
void processDocument(Long docId);
|
||||
void processDocument(String docId);
|
||||
|
||||
/**
|
||||
* 异步处理文档
|
||||
*
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
void processDocumentAsync(Long docId);
|
||||
void processDocumentAsync(String docId);
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
|
||||
// 4. 保存文档元数据
|
||||
AiDocument doc = new AiDocument();
|
||||
doc.setDocId(docId);
|
||||
doc.setId(docId); // 设置标准UUID格式的ID
|
||||
doc.setProjectId(projectId);
|
||||
doc.setSourceType("upload");
|
||||
doc.setTitle(originalFilename);
|
||||
@@ -75,7 +75,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
// 5. 异步处理文档(解析、切片、向量化)
|
||||
documentProcessor.processDocumentAsync(doc.getId());
|
||||
|
||||
log.info("文件上传成功: {}, docId: {}", originalFilename, docId);
|
||||
log.info("文件上传成功: {}, docId: {}", originalFilename, doc.getId());
|
||||
|
||||
// 6. 返回VO
|
||||
return convertToVO(doc);
|
||||
@@ -133,13 +133,13 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processDocument(Long docId) {
|
||||
public void processDocument(String docId) {
|
||||
documentProcessor.processDocument(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Async
|
||||
public void processDocumentAsync(Long docId) {
|
||||
public void processDocumentAsync(String docId) {
|
||||
documentProcessor.processDocument(docId);
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
private KbDocumentVO convertToVO(AiDocument doc) {
|
||||
KbDocumentVO vo = new KbDocumentVO();
|
||||
vo.setId(doc.getId());
|
||||
vo.setDocId(doc.getDocId());
|
||||
vo.setDocId(doc.getId());
|
||||
vo.setTitle(doc.getTitle());
|
||||
vo.setDocType(doc.getDocType());
|
||||
vo.setFileType(doc.getFileType());
|
||||
|
||||
@@ -16,6 +16,7 @@ import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
@@ -41,7 +42,7 @@ public class DocumentProcessor {
|
||||
*
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
public void processDocument(Long docId) {
|
||||
public void processDocument(String docId) {
|
||||
AiDocument doc = documentMapper.selectById(docId);
|
||||
if (doc == null) {
|
||||
log.error("文档不存在: {}", docId);
|
||||
@@ -91,7 +92,7 @@ public class DocumentProcessor {
|
||||
* @param docId 文档ID
|
||||
*/
|
||||
@Async("documentTaskExecutor")
|
||||
public void processDocumentAsync(Long docId) {
|
||||
public void processDocumentAsync(String docId) {
|
||||
processDocument(docId);
|
||||
}
|
||||
|
||||
@@ -169,23 +170,25 @@ public class DocumentProcessor {
|
||||
* @param chunks 切片列表
|
||||
*/
|
||||
private void storeChunks(AiDocument parentDoc, List<String> chunks) {
|
||||
String docId = parentDoc.getDocId();
|
||||
Long parentId = parentDoc.getId();
|
||||
String parentId = parentDoc.getId();
|
||||
|
||||
for (int i = 0; i < chunks.size(); i++) {
|
||||
String chunkContent = chunks.get(i);
|
||||
// 使用UUID生成唯一的chunk ID,确保格式正确
|
||||
String chunkId = UUID.randomUUID().toString();
|
||||
|
||||
// 创建向量文档
|
||||
Document vectorDoc = new Document(
|
||||
chunkId,
|
||||
chunkContent,
|
||||
Map.of(
|
||||
"doc_id", docId.toString(),
|
||||
"project_id", parentDoc.getProjectId(),
|
||||
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId() : "",
|
||||
"project_id", parentDoc.getProjectId() != null ? parentDoc.getProjectId().toString() : "",
|
||||
"timeline_node_id", parentDoc.getTimelineNodeId() != null ? parentDoc.getTimelineNodeId().toString() : "",
|
||||
"chunk_index", i,
|
||||
"chunk_total", chunks.size(),
|
||||
"title", parentDoc.getTitle(),
|
||||
"source_type", parentDoc.getSourceType(),
|
||||
"chunk_parent_id", parentId,
|
||||
"title", parentDoc.getTitle() != null ? parentDoc.getTitle() : "",
|
||||
"source_type", parentDoc.getSourceType() != null ? parentDoc.getSourceType() : "",
|
||||
"status", "active"
|
||||
)
|
||||
);
|
||||
@@ -199,7 +202,7 @@ public class DocumentProcessor {
|
||||
documentMapper.updateById(parentDoc);
|
||||
}
|
||||
|
||||
log.debug("存储切片: {}/{}, docId: {}", i + 1, chunks.size(), docId);
|
||||
log.debug("存储切片: {}/{}, parentId: {}, chunkId: {}", i + 1, chunks.size(), parentId, chunkId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,14 +46,21 @@ spring:
|
||||
|
||||
ai:
|
||||
openai:
|
||||
api-key: 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
|
||||
base-url: https://sg1.proxy.yinlihupo.cc/proxy/https://openrouter.ai/api
|
||||
##嵌入式模型
|
||||
embedding:
|
||||
options:
|
||||
model: qwen/qwen3-embedding-8b
|
||||
model: text-embedding-v4
|
||||
base-url: https://dashscope.aliyuncs.com/compatible-mode
|
||||
api-key: sk-85c3ccc7c63747c485f9699c90f1972f
|
||||
##聊天模型
|
||||
chat:
|
||||
options:
|
||||
model: google/gemini-3.1-pro-preview
|
||||
api-key: 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
|
||||
base-url: https://sg1.proxy.yinlihupo.cc/proxy/https://openrouter.ai/api
|
||||
##其他模型的apikey
|
||||
base-url: https://sg1.proxy.yinlihupo.cc/proxy/https://openrouter.ai/api
|
||||
api-key: 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
|
||||
|
||||
# MinIO 对象存储配置
|
||||
minio:
|
||||
|
||||
@@ -5,54 +5,52 @@
|
||||
<!-- 获取项目文档列表 -->
|
||||
<select id="selectProjectDocuments" resultType="cn.yinlihupo.domain.vo.KbDocumentVO">
|
||||
SELECT
|
||||
ad.id,
|
||||
ad.doc_id as docId,
|
||||
ad.title,
|
||||
ad.doc_type as docType,
|
||||
ad.file_type as fileType,
|
||||
ad.file_size as fileSize,
|
||||
ad.file_path as filePath,
|
||||
ad.source_type as sourceType,
|
||||
ad.chunk_total as chunkCount,
|
||||
ad.status,
|
||||
vs.id,
|
||||
vs.title,
|
||||
vs.doc_type as docType,
|
||||
vs.file_type as fileType,
|
||||
vs.file_size as fileSize,
|
||||
vs.file_path as filePath,
|
||||
vs.source_type as sourceType,
|
||||
vs.chunk_total as chunkCount,
|
||||
vs.status,
|
||||
su.real_name as createByName,
|
||||
ad.create_time as createTime
|
||||
FROM ai_document ad
|
||||
LEFT JOIN sys_user su ON ad.create_by = su.id
|
||||
WHERE ad.project_id = #{projectId}
|
||||
AND ad.deleted = 0
|
||||
AND ad.chunk_parent_id IS NULL
|
||||
ORDER BY ad.create_time DESC
|
||||
vs.create_time as createTime
|
||||
FROM vector_store vs
|
||||
LEFT JOIN sys_user su ON vs.create_by = su.id
|
||||
WHERE vs.project_id = #{projectId}
|
||||
AND vs.deleted = 0
|
||||
AND vs.chunk_parent_id IS NULL
|
||||
ORDER BY vs.create_time DESC
|
||||
</select>
|
||||
|
||||
<!-- 根据docId查询文档 -->
|
||||
<!-- 根据id查询文档 -->
|
||||
<select id="selectByDocId" resultType="cn.yinlihupo.domain.entity.AiDocument">
|
||||
SELECT *
|
||||
FROM ai_document
|
||||
WHERE doc_id = #{docId}
|
||||
FROM vector_store
|
||||
WHERE id = #{docId}
|
||||
AND deleted = 0
|
||||
LIMIT 1
|
||||
</select>
|
||||
|
||||
<!-- 根据docId删除文档 -->
|
||||
<!-- 根据id删除文档 -->
|
||||
<delete id="deleteByDocId">
|
||||
UPDATE ai_document
|
||||
UPDATE vector_store
|
||||
SET deleted = 1,
|
||||
update_time = NOW()
|
||||
WHERE doc_id = #{docId}
|
||||
WHERE id = #{docId}
|
||||
</delete>
|
||||
|
||||
<!-- 批量查询引用文档信息 -->
|
||||
<select id="selectReferencedDocs" resultType="cn.yinlihupo.domain.vo.ReferencedDocVO">
|
||||
SELECT
|
||||
id,
|
||||
doc_id as docId,
|
||||
title,
|
||||
doc_type as docType,
|
||||
file_type as fileType,
|
||||
source_type as sourceType,
|
||||
LEFT(content, 200) as content
|
||||
FROM ai_document
|
||||
FROM vector_store
|
||||
WHERE id IN
|
||||
<foreach collection="docIds" item="id" open="(" separator="," close=")">
|
||||
#{id}
|
||||
@@ -63,38 +61,38 @@
|
||||
<!-- 获取父文档的分块数量 -->
|
||||
<select id="selectChunkCount" resultType="java.lang.Integer">
|
||||
SELECT COUNT(*)
|
||||
FROM ai_document
|
||||
FROM vector_store
|
||||
WHERE chunk_parent_id = #{docId}
|
||||
AND deleted = 0
|
||||
</select>
|
||||
|
||||
<!-- 更新文档状态 -->
|
||||
<update id="updateStatus">
|
||||
UPDATE ai_document
|
||||
UPDATE vector_store
|
||||
SET status = #{status},
|
||||
update_time = NOW()
|
||||
WHERE doc_id = #{docId}
|
||||
WHERE id = #{docId}
|
||||
</update>
|
||||
|
||||
<!-- 更新文档错误信息 -->
|
||||
<update id="updateErrorMessage">
|
||||
UPDATE ai_document
|
||||
UPDATE vector_store
|
||||
SET error_message = #{errorMessage},
|
||||
status = 'error',
|
||||
update_time = NOW()
|
||||
WHERE doc_id = #{docId}
|
||||
WHERE id = #{docId}
|
||||
</update>
|
||||
|
||||
<!-- 增加文档查看次数 -->
|
||||
<update id="incrementViewCount">
|
||||
UPDATE ai_document
|
||||
UPDATE vector_store
|
||||
SET view_count = view_count + 1
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- 增加文档查询次数 -->
|
||||
<update id="incrementQueryCount">
|
||||
UPDATE ai_document
|
||||
UPDATE vector_store
|
||||
SET query_count = query_count + 1,
|
||||
last_queried_at = NOW()
|
||||
WHERE id = #{id}
|
||||
|
||||
Reference in New Issue
Block a user