Refactor code structure for improved readability and maintainability

This commit is contained in:
ash66
2026-05-14 18:09:15 +08:00
parent 10d04c4083
commit 35cd927d02
105 changed files with 9043 additions and 7720 deletions

View File

@@ -1,7 +1,6 @@
# src/services/parser/__init__.py
"""文档解析服务"""
from .pdf_parser import PDFParser
from .docx_parser import DocxParser
__all__ = ["PDFParser", "DocxParser"]
__all__ = ["PDFParser", "DocxParser"]

View File

@@ -1,4 +1,3 @@
# src/services/parser/docx_parser.py
"""Word文档解析 - 使用python-docx"""
from docx import Document
@@ -284,4 +283,4 @@ def parse_docx(file_path: str) -> DocxDocumentContent:
def parse_docx_to_markdown(file_path: str) -> str:
"""便捷函数解析Word并返回Markdown"""
parser = DocxParser()
return parser.parse_to_markdown(file_path)
return parser.parse_to_markdown(file_path)

View File

@@ -1,4 +1,3 @@
# src/services/parser/mineru_parser.py
"""MinerU多模态PDF解析 - 版面感知解析"""
from typing import Optional, Dict
@@ -201,4 +200,4 @@ def parse_with_mineru(file_path: str) -> MinerUResult:
def parse_pdf_smart(file_path: str) -> str:
"""便捷函数智能解析PDF自动选择最佳解析器"""
orchestrator = ParserOrchestrator()
return orchestrator.parse_pdf(file_path)
return orchestrator.parse_pdf(file_path)

View File

@@ -1,4 +1,3 @@
# src/services/parser/pdf_parser.py
"""PDF文档解析 - 使用PyMuPDF基础解析"""
import fitz # PyMuPDF
@@ -265,4 +264,4 @@ def parse_pdf(file_path: str, **kwargs) -> PDFDocumentContent:
def parse_pdf_to_markdown(file_path: str) -> str:
"""便捷函数解析PDF并返回Markdown"""
parser = PDFParser()
return parser.parse_to_markdown(file_path)
return parser.parse_to_markdown(file_path)