diff --git a/enlish-service/debug_roi.jpg b/enlish-service/debug_roi.jpg
new file mode 100644
index 0000000..cdd10cf
Binary files /dev/null and b/enlish-service/debug_roi.jpg differ
diff --git a/enlish-service/pom.xml b/enlish-service/pom.xml
index 58e9094..e8573f9 100644
--- a/enlish-service/pom.xml
+++ b/enlish-service/pom.xml
@@ -100,6 +100,11 @@
opencv
+
+ net.sourceforge.tess4j
+ tess4j
+
+
diff --git a/enlish-service/src/main/java/com/yinlihupo/enlish/service/utils/PngUtil.java b/enlish-service/src/main/java/com/yinlihupo/enlish/service/utils/PngUtil.java
index 42a4b69..46cc661 100644
--- a/enlish-service/src/main/java/com/yinlihupo/enlish/service/utils/PngUtil.java
+++ b/enlish-service/src/main/java/com/yinlihupo/enlish/service/utils/PngUtil.java
@@ -4,15 +4,23 @@ import com.yinlihupo.enlish.service.constant.ExamWordsConstant;
import com.yinlihupo.enlish.service.model.bo.CoordinatesXY;
import com.yinlihupo.enlish.service.model.bo.Word;
import lombok.extern.slf4j.Slf4j;
+import net.sourceforge.tess4j.ITesseract;
+import net.sourceforge.tess4j.Tesseract;
+import net.sourceforge.tess4j.TesseractException;
import nu.pattern.OpenCV;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.opencv.core.*;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
+import org.springframework.beans.factory.annotation.Value;
+import java.awt.image.BufferedImage;
+import java.awt.image.DataBufferByte;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
@Slf4j
public class PngUtil {
@@ -184,6 +192,82 @@ public class PngUtil {
}
}
+ public static Integer analyzeExamWordsId(String imagePath, String tessdataPath, List coordinatesXIES) {
+ // 1. 读取图片
+ Mat src = Imgcodecs.imread(imagePath);
+ if (src.empty()) {
+ System.out.println("无法加载图片");
+ return 0;
+ }
+
+ // 2. 截取左上角区域 (ROI)
+ // 根据图片大概估算:从 (0,0) 开始,宽约 300像素,高约 80像素
+ // 你可以根据实际情况调整这个范围
+ CoordinatesXY left = coordinatesXIES.get(0);
+ Rect roiRect = new Rect(0, 0, left.getX(), left.getY());
+ Mat roi = new Mat(src, roiRect);
+
+ // 3. 图像预处理 (提高 OCR 准确率)
+ // 3.1 转为灰度图
+ Mat gray = new Mat();
+ Imgproc.cvtColor(roi, gray, Imgproc.COLOR_BGR2GRAY);
+
+ // 3.2 二值化 (Thresholding)
+ // 使用 OTSU 算法自动寻找最佳阈值,或者手动指定阈值
+ Mat binary = new Mat();
+ Imgproc.threshold(gray, binary, 0, 255, Imgproc.THRESH_BINARY | Imgproc.THRESH_OTSU);
+
+ // 可选:保存预处理后的图片查看效果
+ Imgcodecs.imwrite("debug_roi.jpg", binary);
+
+ // 4. 将 OpenCV Mat 转换为 BufferedImage (供 Tess4J 使用)
+ BufferedImage processedImage = matToBufferedImage(binary);
+
+ // 5. 使用 Tesseract 进行 OCR 识别
+ ITesseract instance = new Tesseract();
+ instance.setDatapath(tessdataPath);
+ instance.setLanguage("eng");
+
+ try {
+ String result = instance.doOCR(processedImage);
+ System.out.println("OCR 识别原始内容: \n" + result);
+
+ // 6. 使用正则表达式提取 ID
+ // 匹配 "Assessment_id" 后面的数字
+ Pattern pattern = Pattern.compile("id[:\\s_]+(\\d+)");
+ Matcher matcher = pattern.matcher(result);
+
+ if (matcher.find()) {
+ String id = matcher.group(1);
+ System.out.println("-------------------------");
+ System.out.println("成功提取 ID: " + id);
+ System.out.println("-------------------------");
+ return Integer.parseInt(id);
+ } else {
+ System.out.println("未找到匹配的 ID 格式");
+ }
+
+ } catch (TesseractException e) {
+ System.err.println("OCR 识别出错: " + e.getMessage());
+ }
+ return 0;
+ }
+
+ // 辅助方法:Mat 转 BufferedImage
+ public static BufferedImage matToBufferedImage(Mat m) {
+ int type = BufferedImage.TYPE_BYTE_GRAY;
+ if (m.channels() > 1) {
+ type = BufferedImage.TYPE_3BYTE_BGR;
+ }
+ int bufferSize = m.channels() * m.cols() * m.rows();
+ byte[] b = new byte[bufferSize];
+ m.get(0, 0, b); // 获取所有像素
+ BufferedImage image = new BufferedImage(m.cols(), m.rows(), type);
+ final byte[] targetPixels = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
+ System.arraycopy(b, 0, targetPixels, 0, b.length);
+ return image;
+ }
+
private static @NonNull List getCoordinatesXIES(List list, int height) {
List ans = new ArrayList<>();
CoordinatesXY left = new CoordinatesXY();
diff --git a/enlish-service/src/main/resources/config/application-dev.yml b/enlish-service/src/main/resources/config/application-dev.yml
index b8b351a..a3493dd 100644
--- a/enlish-service/src/main/resources/config/application-dev.yml
+++ b/enlish-service/src/main/resources/config/application-dev.yml
@@ -24,5 +24,6 @@ spring:
templates:
word: C:\project\java\enlish_edu\enlish\enlish-service\src\main\resources\templates\assessment_v5.docx
count: 100
+ data: C:\project\tess
tmp:
png: C:\project\java\enlish_edu\enlish\enlish-service\src\main\resources\tmp\png
\ No newline at end of file
diff --git a/enlish-service/src/main/resources/templates/p3.png b/enlish-service/src/main/resources/templates/p3.png
index a30248d..c335031 100644
Binary files a/enlish-service/src/main/resources/templates/p3.png and b/enlish-service/src/main/resources/templates/p3.png differ
diff --git a/enlish-service/src/test/java/com/yinlihupo/enlish/service/omr/TestOmr.java b/enlish-service/src/test/java/com/yinlihupo/enlish/service/omr/TestOmr.java
index f075525..ab4364f 100644
--- a/enlish-service/src/test/java/com/yinlihupo/enlish/service/omr/TestOmr.java
+++ b/enlish-service/src/test/java/com/yinlihupo/enlish/service/omr/TestOmr.java
@@ -6,6 +6,7 @@ import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import nu.pattern.OpenCV;
import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.context.SpringBootTest;
import java.util.Arrays;
@@ -15,6 +16,9 @@ import java.util.List;
@Slf4j
public class TestOmr {
+ @Value("${templates.data}")
+ private String tessdataPath;
+
@Test
public void testOmr(){
OpenCV.loadLocally();
@@ -33,5 +37,11 @@ public class TestOmr {
}
-
+ @Test
+ public void testInteger(){
+ String filePath = "C:\\project\\java\\enlish_edu\\enlish\\enlish-service\\src\\main\\resources\\templates\\p3.png";
+ List coordinatesXIES = PngUtil.analysisXY(filePath);
+ Integer examWordsId = PngUtil.analyzeExamWordsId(filePath, tessdataPath, coordinatesXIES);
+ log.info("examWordsId:{}", examWordsId);
+ }
}
diff --git a/pom.xml b/pom.xml
index 0318a8f..07c8ca6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -116,6 +116,12 @@
4.7.0-0
+
+ net.sourceforge.tess4j
+ tess4j
+ 5.4.0
+
+