diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index aaa65fc8..a9754aa6 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -12,4 +12,4 @@ chown ${USER_UID}:${USER_GID} /usr/local/clouddisk-${VERSION}.jar chown -R ${USER_UID}:${USER_GID} /usr/local/clouddisk-lib chown -R ${USER_UID}:${USER_GID} log -exec gosu ${USER_UID}:${USER_GID} java -Dfile.encoding=UTF-8 -Dloader.path=/usr/local/clouddisk-lib -jar ${JVM_OPTS} /usr/local/clouddisk-${VERSION}.jar --spring.profiles.active=${RUN_ENVIRONMENT} --spring.data.mongodb.uri=${MONGODB_URI} --tess4j.data-path=${TESS4J_DATA_PATH} --file.monitor=${FILE_MONITOR} --file.rootDir=${FILE_ROOT_DIR} --file.monitorIgnoreFilePrefix=${MONITOR_IGNORE_FILE_PREFIX} --logging.level.root=${LOG_LEVEL} --file.ip2region-db-path=/jmalcloud/ip2region.xdb +exec gosu ${USER_UID}:${USER_GID} java -Dfile.encoding=UTF-8 -Dloader.path=/usr/local/clouddisk-lib -jar ${JVM_OPTS} /usr/local/clouddisk-${VERSION}.jar --spring.profiles.active=${RUN_ENVIRONMENT} --spring.data.mongodb.uri=${MONGODB_URI} --tess4j.data-path=${TESS4J_DATA_PATH} --file.monitor=${FILE_MONITOR} --file.rootDir=${FILE_ROOT_DIR} --file.monitorIgnoreFilePrefix=${MONITOR_IGNORE_FILE_PREFIX} --logging.level.root=${LOG_LEVEL} --file.ip2region-db-path=/jmalcloud/ip2region.xdb --file.ocr-lite-onnx-model-path=/jmalcloud/models diff --git a/docker/Dockerfile b/docker/Dockerfile index 3b130482..3cc2f548 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,13 +20,22 @@ RUN apt-get update && \ chmod -R 777 /usr/local/mxcad/mxcadassembly && \ chmod -R 777 /usr/local/mxcad/mx/so/* && \ cp -r -f /usr/local/mxcad/mx/locale /usr/local/share/locale && \ + # 安装 OcrLiteOnnx + wget https://github.com/jamebal/OcrLiteOnnx/releases/download/v.1.8.2/ubuntu-22.04-${ARCH}-bin.7z && \ + wget https://github.com/jamebal/OcrLiteOnnx/releases/download/v.1.8.2/models.7z && \ + 7z x models.7z -o/jmalcloud/ && \ + 7z x ubuntu-22.04-${ARCH}-bin.7z -o/usr/local/ && \ + mv /usr/local/ubuntu-22.04-${ARCH}-bin/Linux-BIN/OcrLiteOnnx /usr/local/bin/OcrLiteOnnx && \ # 卸载 wget 并清理下载的文件和APT缓存 apt-get remove -y wget unzip && \ apt-get clean && \ rm -f jellyfin-ffmpeg5_5.1.4-3-jammy_${ARCH}.deb && \ rm -rf /var/lib/apt/lists/* && \ rm -f /jmalcloud/mxcad_x86_64.zip && \ - rm -rf /usr/local/__MACOSX/ + rm -rf /usr/local/ubuntu-22.04-${ARCH}-bin && \ + rm -f ubuntu-22.04-${ARCH}-bin.7z && \ + rm -f models.7z && \ + rm -rf /usr/local/__MACOSX # 将/usr/lib/jellyfin-ffmpeg添加到PATH ENV PATH=/usr/lib/jellyfin-ffmpeg:$PATH diff --git a/src/main/java/com/jmal/clouddisk/config/FileProperties.java b/src/main/java/com/jmal/clouddisk/config/FileProperties.java index 593eb0e5..fcf5477b 100644 --- a/src/main/java/com/jmal/clouddisk/config/FileProperties.java +++ b/src/main/java/com/jmal/clouddisk/config/FileProperties.java @@ -95,6 +95,11 @@ public class FileProperties { */ private String ip2regionDbPath; + /** + * ocr-lite-onnx模型路径 + */ + private String ocrLiteONNXModelPath; + public void setIp2regionDbPath(String path) { Path dbPath = Paths.get(path); if (!PathUtil.exists(dbPath, true)) { diff --git a/src/main/java/com/jmal/clouddisk/controller/rest/CloudSettingController.java b/src/main/java/com/jmal/clouddisk/controller/rest/CloudSettingController.java index bdd7f0c2..6a325fba 100644 --- a/src/main/java/com/jmal/clouddisk/controller/rest/CloudSettingController.java +++ b/src/main/java/com/jmal/clouddisk/controller/rest/CloudSettingController.java @@ -8,6 +8,8 @@ import com.jmal.clouddisk.model.LdapConfigDTO; import com.jmal.clouddisk.model.LogOperation; import com.jmal.clouddisk.model.WebsiteSettingDTO; +import com.jmal.clouddisk.ocr.OcrConfig; +import com.jmal.clouddisk.ocr.OcrService; import com.jmal.clouddisk.service.IAuthService; import com.jmal.clouddisk.service.IUserService; import com.jmal.clouddisk.service.impl.SettingService; @@ -42,6 +44,8 @@ public class CloudSettingController { private final VideoProcessService videoProcessService; + private final OcrService ocrService; + @Operation(summary = "重建索引-用户") @GetMapping("/user/setting/sync") @Permission(value = "cloud:file:upload") @@ -65,13 +69,27 @@ public ResponseResult getTranscodeConfig() { return ResultUtil.success(videoProcessService.getTranscodeConfig()); } - @Operation(summary = "设置视频转码配置") + @Operation(summary = "修改视频转码配置") @PutMapping("/cloud/setting/transcode/config") @Permission(value = "cloud:set:sync") - public ResponseResult getTranscodeConfig(@RequestBody @Validated TranscodeConfig transcodeConfig) { + public ResponseResult setTranscodeConfig(@RequestBody @Validated TranscodeConfig transcodeConfig) { return ResultUtil.success(videoProcessService.setTranscodeConfig(transcodeConfig)); } + @Operation(summary = "获取ocr配置") + @GetMapping("/cloud/setting/ocr/config") + @Permission(value = "cloud:set:sync") + public ResponseResult getOcrConfig() { + return ResultUtil.success(ocrService.getOcrConfig()); + } + + @Operation(summary = "修改ocr配置") + @PutMapping("/cloud/setting/ocr/config") + @Permission(value = "cloud:set:sync") + public ResponseResult setOcrConfig(@RequestBody @Validated OcrConfig ocrConfig) { + return ResultUtil.success(ocrService.setOcrConfig(ocrConfig)); + } + @Operation(summary = "取消转码任务") @PutMapping("/cloud/setting/transcode/cancel-task") @Permission(value = "cloud:set:sync") diff --git a/src/main/java/com/jmal/clouddisk/controller/rest/OcrController.java b/src/main/java/com/jmal/clouddisk/controller/rest/OcrController.java index 8b220303..21a155d5 100644 --- a/src/main/java/com/jmal/clouddisk/controller/rest/OcrController.java +++ b/src/main/java/com/jmal/clouddisk/controller/rest/OcrController.java @@ -28,7 +28,7 @@ public String performOcr(@RequestParam String fileUrl) { HttpUtil.downloadFile(fileUrl, tempImagePath); TimeInterval timeInterval = new TimeInterval(); timeInterval.start(); - String str = ocrService.doOCR(tempImagePath, null); + String str = ocrService.doOCR(tempImagePath, null, "tesseract"); log.info("OCR time consuming: {}", timeInterval.intervalMs()); return str; } finally { diff --git a/src/main/java/com/jmal/clouddisk/interceptor/AuthInterceptor.java b/src/main/java/com/jmal/clouddisk/interceptor/AuthInterceptor.java index 5c83b409..03b4726f 100644 --- a/src/main/java/com/jmal/clouddisk/interceptor/AuthInterceptor.java +++ b/src/main/java/com/jmal/clouddisk/interceptor/AuthInterceptor.java @@ -1,7 +1,6 @@ package com.jmal.clouddisk.interceptor; import cn.hutool.core.text.CharSequenceUtil; -import cn.hutool.core.thread.ThreadUtil; import com.alibaba.fastjson.JSON; import com.jmal.clouddisk.exception.ExceptionType; import com.jmal.clouddisk.model.UserAccessTokenDO; @@ -13,6 +12,8 @@ import com.jmal.clouddisk.util.ResponseResult; import com.jmal.clouddisk.util.ResultUtil; import com.jmal.clouddisk.util.TokenUtil; +import io.reactivex.rxjava3.core.Completable; +import io.reactivex.rxjava3.schedulers.Schedulers; import jakarta.servlet.ServletOutputStream; import jakarta.servlet.http.Cookie; import jakarta.servlet.http.HttpServletRequest; @@ -150,7 +151,9 @@ public String getUserNameByAccessToken(HttpServletRequest request) { return null; } // access-token 认证通过 设置该身份的权限 - ThreadUtil.execute(() -> authDAO.updateAccessToken(username)); + Completable.fromAction(() -> authDAO.updateAccessToken(username, userAccessTokenDO.getAccessToken())) + .subscribeOn(Schedulers.io()) + .subscribe(); setAuthorities(username); return userAccessTokenDO.getUsername(); } diff --git a/src/main/java/com/jmal/clouddisk/lucene/LuceneService.java b/src/main/java/com/jmal/clouddisk/lucene/LuceneService.java index 1c2e12c5..844fe2b6 100644 --- a/src/main/java/com/jmal/clouddisk/lucene/LuceneService.java +++ b/src/main/java/com/jmal/clouddisk/lucene/LuceneService.java @@ -122,16 +122,14 @@ public void init() { int processors = Runtime.getRuntime().availableProcessors() - 2; // 获取jvm可用内存 long maxMemory = Runtime.getRuntime().maxMemory(); - // 设置线程数, 假设每个线程占用内存为50M - int maxProcessors = (int) (maxMemory / 50 / 1024 / 1024); + // 设置线程数, 假设每个线程占用内存为100M + int maxProcessors = (int) (maxMemory / 300 / 1024 / 1024); if (processors > maxProcessors) { processors = maxProcessors; } - if (processors < 1) { - processors = 1; - } + processors = Math.max(processors, 1); log.info("updateContentIndexTask 线程数: {}, maxProcessors: {}", processors, maxProcessors); - executorUpdateContentIndexService = ThreadUtil.newFixedExecutor(processors, 1, "updateContentIndexTask", true); + executorUpdateContentIndexService = ThreadUtil.newFixedExecutor(processors, 20, "updateContentIndexTask", true); } if (executorUpdateBigContentIndexService == null) { executorUpdateBigContentIndexService = ThreadUtil.newFixedExecutor(2, 100, "updateBigContentIndexTask", true); diff --git a/src/main/java/com/jmal/clouddisk/lucene/ReadContentService.java b/src/main/java/com/jmal/clouddisk/lucene/ReadContentService.java index 39c26452..3da54c2c 100644 --- a/src/main/java/com/jmal/clouddisk/lucene/ReadContentService.java +++ b/src/main/java/com/jmal/clouddisk/lucene/ReadContentService.java @@ -1,6 +1,5 @@ package com.jmal.clouddisk.lucene; -import cn.hutool.core.io.FileUtil; import cn.hutool.core.util.StrUtil; import com.jmal.clouddisk.media.VideoProcessService; import com.jmal.clouddisk.ocr.OcrService; @@ -43,8 +42,6 @@ import org.jsoup.nodes.Document; import org.springframework.stereotype.Service; -import javax.imageio.ImageIO; -import java.awt.image.BufferedImage; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -62,8 +59,6 @@ public class ReadContentService { public final CommonFileService commonFileService; - public final TaskProgressService taskProgressService; - public final VideoProcessService videoProcessService; /** @@ -123,36 +118,18 @@ public String readPdfContent(File file, String fileId) { } // 如果页面包含图片或没有文字,则进行 OCR if (checkPageContent(document, pageIndex) || text.isEmpty()) { - taskProgressService.addTaskProgress(file, TaskType.OCR, pageNumber + "/" + document.getNumberOfPages()); - content.append(extractPageWithOCR(pdfRenderer, pageIndex, username)); + if (ocrService.getOcrConfig().getEnable()) { + content.append(ocrService.extractPageWithOCR(file, pdfRenderer, pageIndex, document.getNumberOfPages(), username)); + } } } return content.toString(); } catch (IOException e) { FileContentUtil.readFailed(file, e); - } finally { - taskProgressService.removeTaskProgress(file); } return null; } - private String extractPageWithOCR(PDFRenderer pdfRenderer, int pageIndex, String username) { - try { - BufferedImage pageImage = pdfRenderer.renderImageWithDPI(pageIndex, 300); - String tempImageFile = ocrService.generateOrcTempImagePath(username); - ImageIO.write(pageImage, "png", new File(tempImageFile)); - try { - // 使用 OCR 识别页面内容 - return ocrService.doOCR(tempImageFile, ocrService.generateOrcTempImagePath(username)); - } finally { - FileUtil.del(tempImageFile); - } - } catch (Exception e) { - log.error("Error processing page {}", pageIndex + 1, e); - return ""; - } - } - public String readEpubContent(File file, String fileId) { try (InputStream fileInputStream = new FileInputStream(file)) { // 打开 EPUB 文件 diff --git a/src/main/java/com/jmal/clouddisk/lucene/RebuildIndexTaskService.java b/src/main/java/com/jmal/clouddisk/lucene/RebuildIndexTaskService.java index ebd3dd1c..75b9f74c 100644 --- a/src/main/java/com/jmal/clouddisk/lucene/RebuildIndexTaskService.java +++ b/src/main/java/com/jmal/clouddisk/lucene/RebuildIndexTaskService.java @@ -7,6 +7,7 @@ import cn.hutool.core.util.StrUtil; import com.jmal.clouddisk.config.FileProperties; import com.jmal.clouddisk.model.FileDocument; +import com.jmal.clouddisk.ocr.OcrService; import com.jmal.clouddisk.service.impl.CommonFileService; import com.jmal.clouddisk.service.impl.MenuService; import com.jmal.clouddisk.service.impl.RoleService; @@ -73,6 +74,8 @@ public class RebuildIndexTaskService { private double totalCount; + private final OcrService ocrService; + /** * 接收消息的用户 */ @@ -474,6 +477,9 @@ private void setPercentMap(Double syncPercent, Double indexingPercent) { } PERCENT_MAP.put(SYNC_PERCENT, syncPercent); PERCENT_MAP.put(INDEXING_PERCENT, indexingPercent); + if (syncPercent >= 100 && indexingPercent >= 100) { + ocrService.setMaxConcurrentRequests(ocrService.getOcrConfig().getMaxTasks()); + } } private void pushMessage() { diff --git a/src/main/java/com/jmal/clouddisk/lucene/TaskProgress.java b/src/main/java/com/jmal/clouddisk/lucene/TaskProgress.java index 20ca1e16..6e16b903 100644 --- a/src/main/java/com/jmal/clouddisk/lucene/TaskProgress.java +++ b/src/main/java/com/jmal/clouddisk/lucene/TaskProgress.java @@ -20,6 +20,10 @@ public class TaskProgress { * 任务名称 */ private String name; + /** + * 文件路径 + */ + private String path; /** * 任务状态 */ diff --git a/src/main/java/com/jmal/clouddisk/lucene/TaskProgressService.java b/src/main/java/com/jmal/clouddisk/lucene/TaskProgressService.java index 0bcd7244..14e11d15 100644 --- a/src/main/java/com/jmal/clouddisk/lucene/TaskProgressService.java +++ b/src/main/java/com/jmal/clouddisk/lucene/TaskProgressService.java @@ -60,6 +60,7 @@ public void addTaskProgress(File file, TaskType taskType, String progress) { return; } taskProgress = new TaskProgress(taskId, username, taskType, file.getName(), progress); + taskProgress.setPath(commonFileService.getRelativePath(username, file.getAbsolutePath(), file.getName())); } addTaskProgress(taskProgress); } diff --git a/src/main/java/com/jmal/clouddisk/ocr/CommonOcrService.java b/src/main/java/com/jmal/clouddisk/ocr/CommonOcrService.java new file mode 100644 index 00000000..b120832f --- /dev/null +++ b/src/main/java/com/jmal/clouddisk/ocr/CommonOcrService.java @@ -0,0 +1,79 @@ +package com.jmal.clouddisk.ocr; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.lang.ObjectId; +import cn.hutool.core.util.StrUtil; +import com.jmal.clouddisk.config.FileProperties; +import com.jmal.clouddisk.media.FFMPEGCommand; +import com.jmal.clouddisk.service.Constants; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static com.jmal.clouddisk.util.FFMPEGUtils.getWaitingForResults; + +@Service +@Slf4j +@RequiredArgsConstructor +public class CommonOcrService { + + private final FileProperties fileProperties; + + /** + * 生成一个临时的图片路径 + */ + public String generateOrcTempImagePath(String username) { + Path tempPath; + if (StrUtil.isBlank(username)) { + tempPath = Paths.get(fileProperties.getRootDir(), fileProperties.getChunkFileDir()); + } else { + tempPath = Paths.get(fileProperties.getRootDir(), fileProperties.getChunkFileDir(), username); + } + if (!FileUtil.exist(tempPath.toString())) { + FileUtil.mkdir(tempPath.toString()); + } + return Paths.get(tempPath.toString(), ObjectId.next(true) + "_temp_ocr.png").toString(); + } + + /** + * 获取OCR识别之前预处理的图片, 使其更易识别 + * 使用ffmpeg调整灰度和对比度 + * @param inputPath 原始图片路径 + * @return 预处理后的图片路径 + */ + public static String getPreprocessedOCRImage(String inputPath, String outputPath) { + if (FFMPEGCommand.hasNoFFmpeg()) { + return outputPath; + } + if (FileUtil.exist(outputPath)) { + return outputPath; + } + try { + ProcessBuilder processBuilder = getPreOCRImageProcessBuilder(inputPath, outputPath); + Process process = processBuilder.start(); + return getWaitingForResults(outputPath, processBuilder, process); + } catch (InterruptedException e) { + log.error(e.getMessage(), e); + return null; + } catch (IOException e) { + log.error(e.getMessage(), e); + } + return null; + } + + private static ProcessBuilder getPreOCRImageProcessBuilder(String inputPath, String outputPath) { + ProcessBuilder processBuilder = new ProcessBuilder( + Constants.FFMPEG, + "-i", inputPath, + "-vf", "format=gray,eq=contrast=1.5:brightness=0.1", + outputPath + ); + processBuilder.redirectErrorStream(true); + return processBuilder; + } + +} diff --git a/src/main/java/com/jmal/clouddisk/ocr/IOcrService.java b/src/main/java/com/jmal/clouddisk/ocr/IOcrService.java new file mode 100644 index 00000000..1313f604 --- /dev/null +++ b/src/main/java/com/jmal/clouddisk/ocr/IOcrService.java @@ -0,0 +1,13 @@ +package com.jmal.clouddisk.ocr; + +public interface IOcrService { + + /** + * 执行OCR识别 + * @param imagePath 图片路径 + * @param tempImagePath 临时图片路径, 用于存放预处理后的图片 + * @return 识别结果 + */ + String doOCR(String imagePath, String tempImagePath); + +} diff --git a/src/main/java/com/jmal/clouddisk/ocr/OcrConfig.java b/src/main/java/com/jmal/clouddisk/ocr/OcrConfig.java new file mode 100644 index 00000000..998805d3 --- /dev/null +++ b/src/main/java/com/jmal/clouddisk/ocr/OcrConfig.java @@ -0,0 +1,55 @@ +package com.jmal.clouddisk.ocr; + +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.Valid; +import jakarta.validation.constraints.Max; +import jakarta.validation.constraints.Min; +import lombok.Data; +import org.springframework.data.mongodb.core.mapping.Document; + +/** + * OCR配置 + */ +@Data +@Document(collection = "ocrConfig") +@Valid +@Schema +public class OcrConfig { + + @Schema(description = "是否启用orc, 默认开启") + private Boolean enable; + + @Max(value = 8, message = "最大任务数不能超过8") + @Min(value = 1, message = "最大任务数不能小于1") + @Schema(description = "最大任务数, 最多同时处理的ocr任务数, 默认为1") + private Integer maxTasks; + + @Schema(description = "ocr引擎, 默认tesseract") + private String ocrEngine; + + public Boolean getEnable() { + if (enable == null) + return true; + return enable; + } + + public String getOcrEngine() { + if (ocrEngine == null) + return OcrEngine.TESSERACT.getOcrEngineName(); + return ocrEngine; + } + + public void setOcrEngine(String ocrEngine) { + if (ocrEngine == null) + return; + if (!OcrEngine.TESSERACT.getOcrEngineName().equals(ocrEngine) && !OcrEngine.OCR_LITE_ONNX.getOcrEngineName().equals(ocrEngine)) + return; + this.ocrEngine = ocrEngine; + } + + public Integer getMaxTasks() { + if (maxTasks == null) + return 1; + return maxTasks; + } +} diff --git a/src/main/java/com/jmal/clouddisk/ocr/OcrEngine.java b/src/main/java/com/jmal/clouddisk/ocr/OcrEngine.java new file mode 100644 index 00000000..a82ed6ce --- /dev/null +++ b/src/main/java/com/jmal/clouddisk/ocr/OcrEngine.java @@ -0,0 +1,15 @@ +package com.jmal.clouddisk.ocr; + +import lombok.Getter; + +@Getter +public enum OcrEngine { + OCR_LITE_ONNX("ocrLiteOnnx"), + TESSERACT("tesseract"); + + private final String ocrEngineName; + + OcrEngine(String ocrEngineName) { + this.ocrEngineName = ocrEngineName; + } +} diff --git a/src/main/java/com/jmal/clouddisk/ocr/OcrLiteOnnxService.java b/src/main/java/com/jmal/clouddisk/ocr/OcrLiteOnnxService.java new file mode 100644 index 00000000..64847fb5 --- /dev/null +++ b/src/main/java/com/jmal/clouddisk/ocr/OcrLiteOnnxService.java @@ -0,0 +1,99 @@ +package com.jmal.clouddisk.ocr; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.StrUtil; +import com.jmal.clouddisk.config.FileProperties; +import com.jmal.clouddisk.media.FFMPEGCommand; +import com.jmal.clouddisk.service.Constants; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.io.IOException; + +import static com.jmal.clouddisk.util.FFMPEGUtils.getWaitingForResults; + + +@Service("ocrLiteOnnx") +@RequiredArgsConstructor +@Slf4j +public class OcrLiteOnnxService implements IOcrService { + + public final FileProperties fileProperties; + + @Override + public String doOCR(String imagePath, String tempImagePath) { + if (StrUtil.isBlank(imagePath)) { + return ""; + } + String resultTxtPath = null; + try { + resultTxtPath = getResultText(imagePath, imagePath + "-result.txt"); + if (!FileUtil.isFile(resultTxtPath)) { + return ""; + } + return FileUtil.readUtf8String(resultTxtPath); + } catch (Exception e) { + log.warn("Error while performing OCR: {}", e.getMessage(), e); + } finally { + FileUtil.del(tempImagePath); + if (FileUtil.isFile(resultTxtPath)) { + FileUtil.del(resultTxtPath); + } + } + return ""; + } + + /** + * 获取OCR识别结果 + * @param inputPath 输入图片路径 + * @param outputPath 输出文件路径 + * @return String + */ + public String getResultText(String inputPath, String outputPath) { + if (FFMPEGCommand.hasNoFFmpeg()) { + return outputPath; + } + if (FileUtil.exist(outputPath)) { + return outputPath; + } + try { + ProcessBuilder processBuilder = getOcrLiteOnnxProcessBuilder(inputPath); + Process process = processBuilder.start(); + return getWaitingForResults(outputPath, processBuilder, process, 60); + } catch (InterruptedException e) { + log.error(e.getMessage(), e); + return null; + } catch (IOException e) { + log.error(e.getMessage(), e); + } + return null; + } + + /** + * 使用ocr_lite_onnx进行OCR识别 + * @param inputPath 输入图片路径 + * @return ProcessBuilder + */ + private ProcessBuilder getOcrLiteOnnxProcessBuilder(String inputPath) { + ProcessBuilder processBuilder = new ProcessBuilder( + Constants.OCR_LITE_ONNX, + "--models", fileProperties.getOcrLiteONNXModelPath(), + "--det", "dbnet.onnx", + "--rec", "crnn_lite_lstm.onnx", + "--keys", "keys.txt", + "--image", inputPath, + "--numThread", "1", + "--padding", "40", + "--maxSideLen", "1024", + "--boxScoreThresh", "0.6", + "--boxThresh", "0.3", + "--unClipRatio", "2.0", + "--doAngle", "0", + "--mostAngle", "0", + "--outputResultImg", "0" + ); + processBuilder.redirectErrorStream(true); + return processBuilder; + } +} diff --git a/src/main/java/com/jmal/clouddisk/ocr/OcrService.java b/src/main/java/com/jmal/clouddisk/ocr/OcrService.java index e8e33e40..63a22683 100644 --- a/src/main/java/com/jmal/clouddisk/ocr/OcrService.java +++ b/src/main/java/com/jmal/clouddisk/ocr/OcrService.java @@ -1,109 +1,156 @@ package com.jmal.clouddisk.ocr; import cn.hutool.core.io.FileUtil; -import cn.hutool.core.lang.ObjectId; import cn.hutool.core.util.StrUtil; -import com.jmal.clouddisk.config.FileProperties; -import com.jmal.clouddisk.service.Constants; -import com.jmal.clouddisk.media.FFMPEGCommand; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.jmal.clouddisk.lucene.TaskProgressService; +import com.jmal.clouddisk.lucene.TaskType; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import net.sourceforge.tess4j.ITesseract; -import net.sourceforge.tess4j.Tesseract; -import net.sourceforge.tess4j.TesseractException; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.mongodb.core.query.Update; import org.springframework.stereotype.Service; +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; import java.io.File; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; - -import static com.jmal.clouddisk.util.FFMPEGUtils.getWaitingForResults; - +import java.util.Map; +import java.util.concurrent.Semaphore; @Service @RequiredArgsConstructor @Slf4j public class OcrService { - private final ThreadLocal tesseractThreadLocal; + private final Map ocrServiceMap; + + private final CommonOcrService commonOcrService; + + private final TaskProgressService taskProgressService; + + private final MongoTemplate mongoTemplate; - private final FileProperties fileProperties; + // 初始设置为1个并发请求 + private final Semaphore semaphore = new Semaphore(1); - public String doOCR(String imagePath, String tempImagePath) { + private final Cache ocrConfigCache = Caffeine.newBuilder().build(); + + /** + * 提取PDF页面并使用OCR识别 + * @param pdfRenderer PDFRenderer + * @param pageIndex 页码 + * @param username 用户名 + * @return 识别结果 + */ + public String extractPageWithOCR(File file, PDFRenderer pdfRenderer, int pageIndex, int totalPages, String username) { try { - if (StrUtil.isBlank(imagePath)) { - return ""; - } - if (StrUtil.isBlank(tempImagePath)) { - tempImagePath = generateOrcTempImagePath(null); - } - // 预处理后的图片 - String preprocessedOCRImage = getPreprocessedOCRImage(imagePath, tempImagePath); - if (StrUtil.isBlank(preprocessedOCRImage)) { - return ""; + + taskProgressService.addTaskProgress(file, TaskType.OCR, pageIndex + 1 + "/" + totalPages + " - 等待识别"); + + // 获取许可,如果没有可用许可则会阻塞 + semaphore.acquire(); + + taskProgressService.addTaskProgress(file, TaskType.OCR, pageIndex + 1 + "/" + totalPages + " - 识别中..."); + + BufferedImage pageImage = pdfRenderer.renderImageWithDPI(pageIndex, 300); + String tempImageFile = generateOrcTempImagePath(username); + ImageIO.write(pageImage, "png", new File(tempImageFile)); + try { + // 使用 OCR 识别页面内容 + return doOCR(tempImageFile, generateOrcTempImagePath(username), null); + } finally { + FileUtil.del(tempImageFile); + taskProgressService.removeTaskProgress(file); + // 释放许可 + semaphore.release(); } - File imageFile = new File(preprocessedOCRImage); - ITesseract tesseract = tesseractThreadLocal.get(); - return tesseract.doOCR(imageFile); - } catch (TesseractException e) { - log.warn("Error while performing OCR: {}", e.getMessage(), e); - } finally { - FileUtil.del(tempImagePath); + } catch (Exception e) { + log.error("Error processing page {}", pageIndex + 1, e); + return ""; } - return ""; } - /** - * 生成一个临时的图片路径 - */ - public String generateOrcTempImagePath(String username) { - Path tempPath; - if (StrUtil.isBlank(username)) { - tempPath = Paths.get(fileProperties.getRootDir(), fileProperties.getChunkFileDir()); - } else { - tempPath = Paths.get(fileProperties.getRootDir(), fileProperties.getChunkFileDir(), username); + public String doOCR(String imagePath, String tempImagePath, String ocrEngine) { + OcrConfig config = getOcrConfig(); + if (!config.getEnable()) { + return ""; } - if (!FileUtil.exist(tempPath.toString())) { - FileUtil.mkdir(tempPath.toString()); + if (StrUtil.isBlank(ocrEngine)) { + ocrEngine = config.getOcrEngine(); } - return Paths.get(tempPath.toString(), ObjectId.next(true) + "_temp_ocr.png").toString(); + IOcrService ocrService = ocrServiceMap.get(ocrEngine); + if (ocrService == null) { + throw new IllegalArgumentException("Unknown OCR engine: " + ocrEngine); + } + return ocrService.doOCR(imagePath, tempImagePath); } /** - * 获取OCR识别之前预处理的图片, 使其更易识别 - * 使用ffmpeg调整灰度和对比度 - * @param inputPath 原始图片路径 - * @return 预处理后的图片路径 + * 动态调整并发数量 */ - public String getPreprocessedOCRImage(String inputPath, String outputPath) { - if (FFMPEGCommand.hasNoFFmpeg()) { - return outputPath; + public void setMaxConcurrentRequests(int maxConcurrentRequests) { + int currentPermits = semaphore.availablePermits(); + if (maxConcurrentRequests > currentPermits) { + semaphore.release(maxConcurrentRequests - currentPermits); + } else if (maxConcurrentRequests < currentPermits) { + // 清空所有许可并重新设置 + semaphore.drainPermits(); + semaphore.release(maxConcurrentRequests); } - if (FileUtil.exist(outputPath)) { - return outputPath; + } + + public OcrConfig getOcrConfig() { + return ocrConfigCache.get("ocrConfig", key -> { + OcrConfig config = mongoTemplate.findOne(new Query(), OcrConfig.class); + if (config != null) { + setMaxConcurrentRequests(config.getMaxTasks()); + return config; + } + return new OcrConfig(); + }); + } + + private static Update getOcrConfigUpdate(OcrConfig config) { + Update update = new Update(); + update.set("enable", config.getEnable()); + update.set("maxTasks", config.getMaxTasks()); + update.set("ocrEngine", config.getOcrEngine()); + return update; + } + + /** + * 设置Ocr配置 + * + * @param config OcrConfig + */ + public long setOcrConfig(OcrConfig config) { + if (config == null) { + return 0; } - try { - ProcessBuilder processBuilder = getPreOCRImageProcessBuilder(inputPath, outputPath); - Process process = processBuilder.start(); - return getWaitingForResults(outputPath, processBuilder, process); - } catch (InterruptedException e) { - log.error(e.getMessage(), e); - return null; - } catch (IOException e) { - log.error(e.getMessage(), e); + Query query = new Query(); + OcrConfig ocrConfig = mongoTemplate.findOne(query, OcrConfig.class); + if (ocrConfig == null) { + mongoTemplate.save(config); + } else { + Update update = getOcrConfigUpdate(config); + mongoTemplate.updateFirst(query, update, OcrConfig.class); + if (!semaphore.hasQueuedThreads() && semaphore.availablePermits() == ocrConfig.getMaxTasks()) { + semaphore.drainPermits(); + semaphore.release(config.getMaxTasks()); + } } - return null; + ocrConfigCache.put("ocrConfig", config); + return 0; } - private static ProcessBuilder getPreOCRImageProcessBuilder(String inputPath, String outputPath) { - ProcessBuilder processBuilder = new ProcessBuilder( - Constants.FFMPEG, - "-i", inputPath, - "-vf", "format=gray,eq=contrast=1.5:brightness=0.1", - outputPath - ); - processBuilder.redirectErrorStream(true); - return processBuilder; + /** + * 生成一个临时的图片路径 + */ + public String generateOrcTempImagePath(String username) { + return commonOcrService.generateOrcTempImagePath(username); } + } diff --git a/src/main/java/com/jmal/clouddisk/ocr/TesseractService.java b/src/main/java/com/jmal/clouddisk/ocr/TesseractService.java new file mode 100644 index 00000000..f1fc6088 --- /dev/null +++ b/src/main/java/com/jmal/clouddisk/ocr/TesseractService.java @@ -0,0 +1,49 @@ +package com.jmal.clouddisk.ocr; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.StrUtil; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import net.sourceforge.tess4j.ITesseract; +import net.sourceforge.tess4j.Tesseract; +import org.springframework.context.annotation.Primary; +import org.springframework.stereotype.Service; + +import java.io.File; + + +@Service("tesseract") +@RequiredArgsConstructor +@Slf4j +@Primary +public class TesseractService implements IOcrService { + + private final ThreadLocal tesseractThreadLocal; + + private final CommonOcrService commonOcrService; + + @Override + public String doOCR(String imagePath, String tempImagePath) { + if (StrUtil.isBlank(imagePath)) { + return ""; + } + try { + if (StrUtil.isBlank(tempImagePath)) { + tempImagePath = commonOcrService.generateOrcTempImagePath(null); + } + // 预处理后的图片 + String preprocessedOCRImage = CommonOcrService.getPreprocessedOCRImage(imagePath, tempImagePath); + if (StrUtil.isBlank(preprocessedOCRImage)) { + return ""; + } + File imageFile = new File(preprocessedOCRImage); + ITesseract tesseract = tesseractThreadLocal.get(); + return tesseract.doOCR(imageFile); + } catch (Exception e) { + log.warn("Error while performing OCR: {}", e.getMessage(), e); + } finally { + FileUtil.del(tempImagePath); + } + return ""; + } +} diff --git a/src/main/java/com/jmal/clouddisk/repository/IAuthDAO.java b/src/main/java/com/jmal/clouddisk/repository/IAuthDAO.java index 4e032b51..4ab8d14c 100644 --- a/src/main/java/com/jmal/clouddisk/repository/IAuthDAO.java +++ b/src/main/java/com/jmal/clouddisk/repository/IAuthDAO.java @@ -44,11 +44,12 @@ public interface IAuthDAO { */ List accessTokenList(String username); - /*** + /** * 更新accessToken最近访问时间 * @param username 用户名 + * @param token token */ - void updateAccessToken(String username); + void updateAccessToken(String username, String token); /*** * 删除accessToken diff --git a/src/main/java/com/jmal/clouddisk/repository/impl/AuthDAOImpl.java b/src/main/java/com/jmal/clouddisk/repository/impl/AuthDAOImpl.java index 53f39891..29d7e3a0 100644 --- a/src/main/java/com/jmal/clouddisk/repository/impl/AuthDAOImpl.java +++ b/src/main/java/com/jmal/clouddisk/repository/impl/AuthDAOImpl.java @@ -90,9 +90,10 @@ public List accessTokenList(String username) { } @Override - public void updateAccessToken(String username) { + public void updateAccessToken(String username, String token) { Query query = new Query(); query.addCriteria(Criteria.where(USERNAME).is(username)); + query.addCriteria(Criteria.where(ACCESS_TOKEN).is(token)); Update update = new Update(); update.set("lastActiveTime", LocalDateTime.now(TimeUntils.ZONE_ID)); mongoTemplate.upsert(query, update,ACCESS_TOKEN_COLLECTION_NAME); diff --git a/src/main/java/com/jmal/clouddisk/service/Constants.java b/src/main/java/com/jmal/clouddisk/service/Constants.java index f69e21f7..439ab2c1 100644 --- a/src/main/java/com/jmal/clouddisk/service/Constants.java +++ b/src/main/java/com/jmal/clouddisk/service/Constants.java @@ -33,6 +33,8 @@ private Constants() { } public static final String FFMPEG = "ffmpeg"; + public static final String OCR_LITE_ONNX = "OcrLiteOnnx"; + public static final String HEIF_CONVERT = "heif-convert"; public static final String DOCUMENT = "document"; diff --git a/src/main/java/com/jmal/clouddisk/service/impl/CommonFileService.java b/src/main/java/com/jmal/clouddisk/service/impl/CommonFileService.java index f3bff909..e760f03b 100644 --- a/src/main/java/com/jmal/clouddisk/service/impl/CommonFileService.java +++ b/src/main/java/com/jmal/clouddisk/service/impl/CommonFileService.java @@ -403,7 +403,7 @@ private void updateOtherInfo(FileDocument fileExists, String contentType, String } } - private String getRelativePath(String username, String fileAbsolutePath, String fileName) { + public String getRelativePath(String username, String fileAbsolutePath, String fileName) { int startIndex = fileProperties.getRootDir().length() + username.length() + 1; int endIndex = fileAbsolutePath.length() - fileName.length(); if (startIndex >= endIndex) { diff --git a/src/main/java/com/jmal/clouddisk/util/FFMPEGUtils.java b/src/main/java/com/jmal/clouddisk/util/FFMPEGUtils.java index e8d52d81..48666285 100644 --- a/src/main/java/com/jmal/clouddisk/util/FFMPEGUtils.java +++ b/src/main/java/com/jmal/clouddisk/util/FFMPEGUtils.java @@ -26,13 +26,13 @@ public static void printErrorInfo(ProcessBuilder processBuilder, Process process public static void printErrorInfo(ProcessBuilder processBuilder) { // 打印命令 用空格连接 String command = String.join(" ", processBuilder.command()); - log.error("ffmpeg 执行失败, command: \r\n{}", command); + log.error("命令 执行失败, command: \r\n{}", command); } public static void printSuccessInfo(ProcessBuilder processBuilder) { // 打印命令 用空格连接 String command = String.join(" ", processBuilder.command()); - log.info("ffmpeg 执行成功, command: \r\n{}", command); + log.info("命令 执行成功, command: \r\n{}", command); } /** diff --git a/src/main/resources/file.yml b/src/main/resources/file.yml index 012e9494..2e25e042 100644 --- a/src/main/resources/file.yml +++ b/src/main/resources/file.yml @@ -29,4 +29,6 @@ file: web-dav-prefix: webDAV # ip2region.xdb path ip2region-db-path: /Users/jmal/studio/myProject/github/jmal-cloud-server/docker/ip2region.xdb + # OcrLiteOnnxModel path + ocr-lite-onnx-model-path: /Users/jmal/studio/myProject/github/jmal-cloud-server/docker/models