- 新增AiDocument实体类,映射数据库ai_document表结构 - 添加AiDocumentMapper接口,提供文档增删改查及状态更新等数据库操作 - 实现AiKnowledgeBaseService接口及其实现类AiKnowledgeBaseServiceImpl,支持文件上传、文档列表查询、删除和重新索引 - 在AiKnowledgeBaseController中提供REST接口支持文件上传、文档管理和异步重新索引操作 - 实现DocumentProcessor组件,负责文档解析、文本切片、摘要生成和向量化存储 - 集成MinioService实现文件的上传、下载和删除操作 - 设计KbDocumentVO作为知识库文档视图对象,方便接口数据传输和展示 - 增加文件类型支持和上传文件校验,限制最大50MB文件大小 - 使用异步机制处理文档解析和向量化,提高系统处理性能和响应速度 - 实现文档状态管理和错误处理,确保文档处理流程的正确性和稳定性
213 lines
6.7 KiB
Java
213 lines
6.7 KiB
Java
package cn.yinlihupo.service.ai.impl;
|
||
|
||
import cn.yinlihupo.domain.entity.AiDocument;
|
||
import cn.yinlihupo.domain.vo.KbDocumentVO;
|
||
import cn.yinlihupo.mapper.AiDocumentMapper;
|
||
import cn.yinlihupo.service.ai.AiKnowledgeBaseService;
|
||
import cn.yinlihupo.service.ai.rag.DocumentProcessor;
|
||
import cn.yinlihupo.service.oss.MinioService;
|
||
import lombok.RequiredArgsConstructor;
|
||
import lombok.extern.slf4j.Slf4j;
|
||
import org.springframework.scheduling.annotation.Async;
|
||
import org.springframework.stereotype.Service;
|
||
import org.springframework.web.multipart.MultipartFile;
|
||
|
||
import java.time.LocalDateTime;
|
||
import java.util.List;
|
||
import java.util.UUID;
|
||
|
||
/**
|
||
* AI知识库服务实现
|
||
*/
|
||
@Slf4j
|
||
@Service
|
||
@RequiredArgsConstructor
|
||
public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||
|
||
private final AiDocumentMapper documentMapper;
|
||
private final DocumentProcessor documentProcessor;
|
||
private final MinioService minioService;
|
||
|
||
// 支持的文件类型
|
||
private static final List<String> SUPPORTED_TYPES = List.of(
|
||
"pdf", "doc", "docx", "txt", "md", "json", "csv"
|
||
);
|
||
|
||
@Override
|
||
public KbDocumentVO uploadFile(Long projectId, MultipartFile file, Long userId) {
|
||
// 1. 验证文件
|
||
validateFile(file);
|
||
|
||
// 2. 生成文档UUID
|
||
String docId = UUID.randomUUID().toString();
|
||
|
||
// 3. 上传文件到MinIO
|
||
String originalFilename = file.getOriginalFilename();
|
||
String fileExtension = getFileExtension(originalFilename);
|
||
String filePath = String.format("kb/%d/%s.%s", projectId, docId, fileExtension);
|
||
|
||
try {
|
||
minioService.uploadFile(filePath, file.getInputStream(), file.getContentType());
|
||
} catch (Exception e) {
|
||
log.error("上传文件到MinIO失败: {}", e.getMessage(), e);
|
||
throw new RuntimeException("文件上传失败: " + e.getMessage());
|
||
}
|
||
|
||
// 4. 保存文档元数据
|
||
AiDocument doc = new AiDocument();
|
||
doc.setDocId(docId);
|
||
doc.setProjectId(projectId);
|
||
doc.setSourceType("upload");
|
||
doc.setTitle(originalFilename);
|
||
doc.setDocType(detectDocType(fileExtension));
|
||
doc.setFileType(fileExtension);
|
||
doc.setFileSize(file.getSize());
|
||
doc.setFilePath(filePath);
|
||
doc.setContent("");
|
||
doc.setStatus("pending"); // 待处理状态
|
||
doc.setChunkTotal(0);
|
||
doc.setCreateBy(userId);
|
||
doc.setCreateTime(LocalDateTime.now());
|
||
doc.setDeleted(0);
|
||
|
||
documentMapper.insert(doc);
|
||
|
||
// 5. 异步处理文档(解析、切片、向量化)
|
||
documentProcessor.processDocumentAsync(doc.getId());
|
||
|
||
log.info("文件上传成功: {}, docId: {}", originalFilename, docId);
|
||
|
||
// 6. 返回VO
|
||
return convertToVO(doc);
|
||
}
|
||
|
||
@Override
|
||
public List<KbDocumentVO> getProjectDocuments(Long projectId) {
|
||
return documentMapper.selectProjectDocuments(projectId);
|
||
}
|
||
|
||
@Override
|
||
public void deleteDocument(String docId, Long userId) {
|
||
// 1. 查询文档
|
||
AiDocument doc = documentMapper.selectByDocId(docId);
|
||
if (doc == null) {
|
||
throw new RuntimeException("文档不存在");
|
||
}
|
||
|
||
// 2. 删除MinIO中的文件
|
||
try {
|
||
minioService.deleteFile(doc.getFilePath());
|
||
} catch (Exception e) {
|
||
log.error("删除MinIO文件失败: {}, 错误: {}", doc.getFilePath(), e.getMessage());
|
||
// 继续删除数据库记录
|
||
}
|
||
|
||
// 3. 删除向量库中的向量(简化处理,实际可能需要更复杂的逻辑)
|
||
documentProcessor.deleteDocumentVectors(docId);
|
||
|
||
// 4. 删除数据库记录
|
||
documentMapper.deleteByDocId(docId);
|
||
|
||
log.info("文档删除成功: {}, userId: {}", docId, userId);
|
||
}
|
||
|
||
@Override
|
||
public void reindexDocument(String docId, Long userId) {
|
||
// 1. 查询文档
|
||
AiDocument doc = documentMapper.selectByDocId(docId);
|
||
if (doc == null) {
|
||
throw new RuntimeException("文档不存在");
|
||
}
|
||
|
||
// 2. 更新状态为处理中
|
||
doc.setStatus("processing");
|
||
documentMapper.updateById(doc);
|
||
|
||
// 3. 删除旧的向量
|
||
documentProcessor.deleteDocumentVectors(docId);
|
||
|
||
// 4. 重新处理
|
||
documentProcessor.processDocumentAsync(doc.getId());
|
||
|
||
log.info("文档重新索引: {}, userId: {}", docId, userId);
|
||
}
|
||
|
||
@Override
|
||
public void processDocument(Long docId) {
|
||
documentProcessor.processDocument(docId);
|
||
}
|
||
|
||
@Override
|
||
@Async
|
||
public void processDocumentAsync(Long docId) {
|
||
documentProcessor.processDocument(docId);
|
||
}
|
||
|
||
/**
|
||
* 验证文件
|
||
*/
|
||
private void validateFile(MultipartFile file) {
|
||
if (file == null || file.isEmpty()) {
|
||
throw new RuntimeException("文件不能为空");
|
||
}
|
||
|
||
String filename = file.getOriginalFilename();
|
||
if (filename == null || filename.isEmpty()) {
|
||
throw new RuntimeException("文件名不能为空");
|
||
}
|
||
|
||
String extension = getFileExtension(filename);
|
||
if (!SUPPORTED_TYPES.contains(extension.toLowerCase())) {
|
||
throw new RuntimeException("不支持的文件类型: " + extension);
|
||
}
|
||
|
||
// 文件大小限制(50MB)
|
||
long maxSize = 50 * 1024 * 1024;
|
||
if (file.getSize() > maxSize) {
|
||
throw new RuntimeException("文件大小超过限制(最大50MB)");
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取文件扩展名
|
||
*/
|
||
private String getFileExtension(String filename) {
|
||
if (filename == null || filename.lastIndexOf('.') == -1) {
|
||
return "";
|
||
}
|
||
return filename.substring(filename.lastIndexOf('.') + 1).toLowerCase();
|
||
}
|
||
|
||
/**
|
||
* 检测文档类型
|
||
*/
|
||
private String detectDocType(String extension) {
|
||
return switch (extension.toLowerCase()) {
|
||
case "pdf" -> "report";
|
||
case "doc", "docx" -> "document";
|
||
case "txt", "md" -> "text";
|
||
case "json", "csv" -> "data";
|
||
default -> "other";
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 转换为VO
|
||
*/
|
||
private KbDocumentVO convertToVO(AiDocument doc) {
|
||
KbDocumentVO vo = new KbDocumentVO();
|
||
vo.setId(doc.getId());
|
||
vo.setDocId(doc.getDocId());
|
||
vo.setTitle(doc.getTitle());
|
||
vo.setDocType(doc.getDocType());
|
||
vo.setFileType(doc.getFileType());
|
||
vo.setFileSize(doc.getFileSize());
|
||
vo.setFilePath(doc.getFilePath());
|
||
vo.setSourceType(doc.getSourceType());
|
||
vo.setChunkCount(doc.getChunkTotal());
|
||
vo.setStatus(doc.getStatus());
|
||
vo.setCreateTime(doc.getCreateTime());
|
||
return vo;
|
||
}
|
||
}
|