Refactor code structure for improved readability and maintainability
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
# src/services/parser/__init__.py
|
||||
"""文档解析服务"""
|
||||
|
||||
from .pdf_parser import PDFParser
|
||||
from .docx_parser import DocxParser
|
||||
|
||||
__all__ = ["PDFParser", "DocxParser"]
|
||||
__all__ = ["PDFParser", "DocxParser"]
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# src/services/parser/docx_parser.py
|
||||
"""Word文档解析 - 使用python-docx"""
|
||||
|
||||
from docx import Document
|
||||
@@ -284,4 +283,4 @@ def parse_docx(file_path: str) -> DocxDocumentContent:
|
||||
def parse_docx_to_markdown(file_path: str) -> str:
|
||||
"""便捷函数:解析Word并返回Markdown"""
|
||||
parser = DocxParser()
|
||||
return parser.parse_to_markdown(file_path)
|
||||
return parser.parse_to_markdown(file_path)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# src/services/parser/mineru_parser.py
|
||||
"""MinerU多模态PDF解析 - 版面感知解析"""
|
||||
|
||||
from typing import Optional, Dict
|
||||
@@ -201,4 +200,4 @@ def parse_with_mineru(file_path: str) -> MinerUResult:
|
||||
def parse_pdf_smart(file_path: str) -> str:
|
||||
"""便捷函数:智能解析PDF(自动选择最佳解析器)"""
|
||||
orchestrator = ParserOrchestrator()
|
||||
return orchestrator.parse_pdf(file_path)
|
||||
return orchestrator.parse_pdf(file_path)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# src/services/parser/pdf_parser.py
|
||||
"""PDF文档解析 - 使用PyMuPDF基础解析"""
|
||||
|
||||
import fitz # PyMuPDF
|
||||
@@ -265,4 +264,4 @@ def parse_pdf(file_path: str, **kwargs) -> PDFDocumentContent:
|
||||
def parse_pdf_to_markdown(file_path: str) -> str:
|
||||
"""便捷函数:解析PDF并返回Markdown"""
|
||||
parser = PDFParser()
|
||||
return parser.parse_to_markdown(file_path)
|
||||
return parser.parse_to_markdown(file_path)
|
||||
|
||||
Reference in New Issue
Block a user