feat(omr): 新增基于OCR的试卷ID识别功能

- 集成Tess4J实现OCR识别,新增analyzeExamWordsId方法提取试卷ID
- 对试卷图片左上角区域进行裁剪和预处理以提升识别准确率
- 添加Mat到BufferedImage的转换辅助方法,支持OCR读取
- 在测试用例中增加对OCR识别功能的集成测试
- 修改配置文件添加OCR数据路径,完善依赖引入OpenCV和Tess4J库
This commit is contained in:
lbw
2025-12-12 17:04:02 +08:00
parent e0258c7ddf
commit e729ddc829
7 changed files with 107 additions and 1 deletions

View File

@@ -6,6 +6,7 @@ import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import nu.pattern.OpenCV;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.context.SpringBootTest;
import java.util.Arrays;
@@ -15,6 +16,9 @@ import java.util.List;
@Slf4j
public class TestOmr {
@Value("${templates.data}")
private String tessdataPath;
@Test
public void testOmr(){
OpenCV.loadLocally();
@@ -33,5 +37,11 @@ public class TestOmr {
}
@Test
public void testInteger(){
String filePath = "C:\\project\\java\\enlish_edu\\enlish\\enlish-service\\src\\main\\resources\\templates\\p3.png";
List<CoordinatesXY> coordinatesXIES = PngUtil.analysisXY(filePath);
Integer examWordsId = PngUtil.analyzeExamWordsId(filePath, tessdataPath, coordinatesXIES);
log.info("examWordsId:{}", examWordsId);
}
}