Files
ylhp-ai-project-manager/src/main/java/cn/yinlihupo/service/ai/impl/AiKnowledgeBaseServiceImpl.java
JiaoTianBo 9f972f5e30 feat(ai-knowledge-base): 实现AI知识库文档上传与管理功能
- 新增AiDocument实体类,映射数据库ai_document表结构
- 添加AiDocumentMapper接口,提供文档增删改查及状态更新等数据库操作
- 实现AiKnowledgeBaseService接口及其实现类AiKnowledgeBaseServiceImpl,支持文件上传、文档列表查询、删除和重新索引
- 在AiKnowledgeBaseController中提供REST接口支持文件上传、文档管理和异步重新索引操作
- 实现DocumentProcessor组件,负责文档解析、文本切片、摘要生成和向量化存储
- 集成MinioService实现文件的上传、下载和删除操作
- 设计KbDocumentVO作为知识库文档视图对象,方便接口数据传输和展示
- 增加文件类型支持和上传文件校验,限制最大50MB文件大小
- 使用异步机制处理文档解析和向量化,提高系统处理性能和响应速度
- 实现文档状态管理和错误处理,确保文档处理流程的正确性和稳定性
2026-03-30 16:49:07 +08:00

213 lines
6.7 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package cn.yinlihupo.service.ai.impl;
import cn.yinlihupo.domain.entity.AiDocument;
import cn.yinlihupo.domain.vo.KbDocumentVO;
import cn.yinlihupo.mapper.AiDocumentMapper;
import cn.yinlihupo.service.ai.AiKnowledgeBaseService;
import cn.yinlihupo.service.ai.rag.DocumentProcessor;
import cn.yinlihupo.service.oss.MinioService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.time.LocalDateTime;
import java.util.List;
import java.util.UUID;
/**
* AI知识库服务实现
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
private final AiDocumentMapper documentMapper;
private final DocumentProcessor documentProcessor;
private final MinioService minioService;
// 支持的文件类型
private static final List<String> SUPPORTED_TYPES = List.of(
"pdf", "doc", "docx", "txt", "md", "json", "csv"
);
@Override
public KbDocumentVO uploadFile(Long projectId, MultipartFile file, Long userId) {
// 1. 验证文件
validateFile(file);
// 2. 生成文档UUID
String docId = UUID.randomUUID().toString();
// 3. 上传文件到MinIO
String originalFilename = file.getOriginalFilename();
String fileExtension = getFileExtension(originalFilename);
String filePath = String.format("kb/%d/%s.%s", projectId, docId, fileExtension);
try {
minioService.uploadFile(filePath, file.getInputStream(), file.getContentType());
} catch (Exception e) {
log.error("上传文件到MinIO失败: {}", e.getMessage(), e);
throw new RuntimeException("文件上传失败: " + e.getMessage());
}
// 4. 保存文档元数据
AiDocument doc = new AiDocument();
doc.setDocId(docId);
doc.setProjectId(projectId);
doc.setSourceType("upload");
doc.setTitle(originalFilename);
doc.setDocType(detectDocType(fileExtension));
doc.setFileType(fileExtension);
doc.setFileSize(file.getSize());
doc.setFilePath(filePath);
doc.setContent("");
doc.setStatus("pending"); // 待处理状态
doc.setChunkTotal(0);
doc.setCreateBy(userId);
doc.setCreateTime(LocalDateTime.now());
doc.setDeleted(0);
documentMapper.insert(doc);
// 5. 异步处理文档(解析、切片、向量化)
documentProcessor.processDocumentAsync(doc.getId());
log.info("文件上传成功: {}, docId: {}", originalFilename, docId);
// 6. 返回VO
return convertToVO(doc);
}
@Override
public List<KbDocumentVO> getProjectDocuments(Long projectId) {
return documentMapper.selectProjectDocuments(projectId);
}
@Override
public void deleteDocument(String docId, Long userId) {
// 1. 查询文档
AiDocument doc = documentMapper.selectByDocId(docId);
if (doc == null) {
throw new RuntimeException("文档不存在");
}
// 2. 删除MinIO中的文件
try {
minioService.deleteFile(doc.getFilePath());
} catch (Exception e) {
log.error("删除MinIO文件失败: {}, 错误: {}", doc.getFilePath(), e.getMessage());
// 继续删除数据库记录
}
// 3. 删除向量库中的向量(简化处理,实际可能需要更复杂的逻辑)
documentProcessor.deleteDocumentVectors(docId);
// 4. 删除数据库记录
documentMapper.deleteByDocId(docId);
log.info("文档删除成功: {}, userId: {}", docId, userId);
}
@Override
public void reindexDocument(String docId, Long userId) {
// 1. 查询文档
AiDocument doc = documentMapper.selectByDocId(docId);
if (doc == null) {
throw new RuntimeException("文档不存在");
}
// 2. 更新状态为处理中
doc.setStatus("processing");
documentMapper.updateById(doc);
// 3. 删除旧的向量
documentProcessor.deleteDocumentVectors(docId);
// 4. 重新处理
documentProcessor.processDocumentAsync(doc.getId());
log.info("文档重新索引: {}, userId: {}", docId, userId);
}
@Override
public void processDocument(Long docId) {
documentProcessor.processDocument(docId);
}
@Override
@Async
public void processDocumentAsync(Long docId) {
documentProcessor.processDocument(docId);
}
/**
* 验证文件
*/
private void validateFile(MultipartFile file) {
if (file == null || file.isEmpty()) {
throw new RuntimeException("文件不能为空");
}
String filename = file.getOriginalFilename();
if (filename == null || filename.isEmpty()) {
throw new RuntimeException("文件名不能为空");
}
String extension = getFileExtension(filename);
if (!SUPPORTED_TYPES.contains(extension.toLowerCase())) {
throw new RuntimeException("不支持的文件类型: " + extension);
}
// 文件大小限制50MB
long maxSize = 50 * 1024 * 1024;
if (file.getSize() > maxSize) {
throw new RuntimeException("文件大小超过限制最大50MB");
}
}
/**
* 获取文件扩展名
*/
private String getFileExtension(String filename) {
if (filename == null || filename.lastIndexOf('.') == -1) {
return "";
}
return filename.substring(filename.lastIndexOf('.') + 1).toLowerCase();
}
/**
* 检测文档类型
*/
private String detectDocType(String extension) {
return switch (extension.toLowerCase()) {
case "pdf" -> "report";
case "doc", "docx" -> "document";
case "txt", "md" -> "text";
case "json", "csv" -> "data";
default -> "other";
};
}
/**
* 转换为VO
*/
private KbDocumentVO convertToVO(AiDocument doc) {
KbDocumentVO vo = new KbDocumentVO();
vo.setId(doc.getId());
vo.setDocId(doc.getDocId());
vo.setTitle(doc.getTitle());
vo.setDocType(doc.getDocType());
vo.setFileType(doc.getFileType());
vo.setFileSize(doc.getFileSize());
vo.setFilePath(doc.getFilePath());
vo.setSourceType(doc.getSourceType());
vo.setChunkCount(doc.getChunkTotal());
vo.setStatus(doc.getStatus());
vo.setCreateTime(doc.getCreateTime());
return vo;
}
}