2025-10-31 14:38:37 +08:00
|
|
|
|
#
|
|
|
|
|
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
|
#
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
#
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
#
|
|
|
|
|
|
"""
|
|
|
|
|
|
独立的 OCR 模块
|
|
|
|
|
|
|
|
|
|
|
|
此模块从 RAGFlow 项目中提取,已经移除了对 RAGFlow 特定模块的依赖。
|
|
|
|
|
|
可以直接作为独立模块使用。
|
|
|
|
|
|
|
|
|
|
|
|
使用方法:
|
2025-11-03 10:22:28 +08:00
|
|
|
|
from ocr import OCR, SimplePdfParser
|
2025-10-31 14:38:37 +08:00
|
|
|
|
import cv2
|
|
|
|
|
|
|
|
|
|
|
|
ocr = OCR()
|
|
|
|
|
|
img = cv2.imread("image.jpg")
|
|
|
|
|
|
results = ocr(img)
|
2025-11-03 10:22:28 +08:00
|
|
|
|
|
|
|
|
|
|
parser = SimplePdfParser()
|
|
|
|
|
|
result = parser.parse_pdf("document.pdf")
|
2025-10-31 14:38:37 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# 处理导入问题:支持直接运行和模块导入
|
|
|
|
|
|
import sys
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = ['OCR', 'TextDetector', 'TextRecognizer', 'SimplePdfParser']
|
|
|
|
|
|
|
2025-11-03 10:22:28 +08:00
|
|
|
|
|