优化OCR解析

This commit is contained in:
2025-11-03 10:22:28 +08:00
parent 4603a86df4
commit 3e58c3d0e9
9 changed files with 581 additions and 30 deletions

View File

@@ -57,7 +57,7 @@ class ParseResponse(BaseModel):
data: Optional[dict] = None
@router.get(
@ocr_router.get(
"/health",
summary="健康检查",
description="检查OCR服务的健康状态和配置信息",
@@ -79,7 +79,7 @@ async def health_check():
}
@router.post(
@ocr_router.post(
"/parse",
response_model=ParseResponse,
summary="上传并解析PDF文件",
@@ -165,7 +165,7 @@ async def parse_pdf_endpoint(
logger.warning(f"Failed to delete temp file {temp_file}: {e}")
@router.post(
@ocr_router.post(
"/parse/bytes",
response_model=ParseResponse,
summary="通过二进制数据解析PDF",
@@ -244,7 +244,7 @@ async def parse_pdf_bytes(
logger.warning(f"Failed to delete temp file {temp_file}: {e}")
@router.post(
@ocr_router.post(
"/parse/path",
response_model=ParseResponse,
summary="通过文件路径解析PDF",
@@ -315,7 +315,7 @@ async def parse_pdf_path(
)
@router.post(
@ocr_router.post(
"/parse_into_bboxes",
summary="解析PDF并返回边界框",
description="解析PDF文件并返回文本边界框信息用于文档结构化处理",
@@ -414,7 +414,7 @@ class RemoveTagResponse(BaseModel):
text: Optional[str] = None
@router.post(
@ocr_router.post(
"/remove_tag",
response_model=RemoveTagResponse,
summary="移除文本中的位置标签",
@@ -464,7 +464,7 @@ class ExtractPositionsResponse(BaseModel):
positions: Optional[list] = None
@router.post(
@ocr_router.post(
"/extract_positions",
response_model=ExtractPositionsResponse,
summary="从文本中提取位置信息",