Refactor code structure for improved readability and maintainability

This commit is contained in:
ash66
2026-05-14 18:09:15 +08:00
parent 10d04c4083
commit 35cd927d02
105 changed files with 9043 additions and 7720 deletions

View File

@@ -6,10 +6,11 @@ from loguru import logger
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(PROJECT_ROOT, "backend"))
from src.services.embedding.text_chunker import RegulationChunker, TextChunk, ChunkMetadata
from src.services.embedding.bge_m3_embedder import BGEM3Embedder, EmbeddingResult
from app.services.embedding.text_chunker import RegulationChunker, TextChunk, ChunkMetadata
from app.services.embedding.bge_m3_embedder import BGEM3Embedder, EmbeddingResult
class TestRegulationChunker:
@@ -181,4 +182,4 @@ class TestBGEM3Embedder:
if __name__ == "__main__":
pytest.main([__file__, "-v"])
pytest.main([__file__, "-v"])

View File

@@ -6,11 +6,12 @@ from loguru import logger
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(PROJECT_ROOT, "backend"))
from src.services.storage.milvus_client import MilvusClient, SearchResult
from src.services.embedding.bge_m3_embedder import BGEM3Embedder
from src.config.settings import settings
from app.services.storage.milvus_client import MilvusClient, SearchResult
from app.services.embedding.bge_m3_embedder import BGEM3Embedder
from app.config.settings import settings
class TestMilvusConnection:
@@ -55,7 +56,7 @@ class TestMilvusOperations:
def test_insert_and_search(self, client):
"""测试插入和检索"""
from src.services.embedding.text_chunker import TextChunk, ChunkMetadata
from app.services.embedding.text_chunker import TextChunk, ChunkMetadata
# 创建测试数据
chunks = [
@@ -133,4 +134,4 @@ class TestEmbedding:
if __name__ == "__main__":
pytest.main([__file__, "-v"])
pytest.main([__file__, "-v"])

View File

@@ -6,11 +6,12 @@ from loguru import logger
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(PROJECT_ROOT, "backend"))
from src.services.parser.pdf_parser import PDFParser, parse_pdf_to_markdown
from src.services.parser.docx_parser import DocxParser, parse_docx_to_markdown
from src.services.parser.mineru_parser import MinerUParser, ParserOrchestrator
from app.services.parser.pdf_parser import PDFParser, parse_pdf_to_markdown
from app.services.parser.docx_parser import DocxParser, parse_docx_to_markdown
from app.services.parser.mineru_parser import MinerUParser, ParserOrchestrator
class TestPDFParser:
@@ -60,14 +61,14 @@ class TestChunker:
def test_chunker_initialization(self):
"""测试分块器初始化"""
from src.services.embedding.text_chunker import RegulationChunker
from app.services.embedding.text_chunker import RegulationChunker
chunker = RegulationChunker(chunk_size=512)
assert chunker is not None
def test_chunk_sample_text(self):
"""测试分块示例文本"""
from src.services.embedding.text_chunker import RegulationChunker
from app.services.embedding.text_chunker import RegulationChunker
sample_text = """
# 测试法规文档
@@ -106,7 +107,7 @@ class TestFullPipeline:
def test_pipeline_without_files(self):
"""测试流程初始化(无文件)"""
from src.services.document_processor import DocumentProcessor
from app.services.document_processor import DocumentProcessor
processor = DocumentProcessor()
assert processor is not None
@@ -115,4 +116,4 @@ class TestFullPipeline:
if __name__ == "__main__":
pytest.main([__file__, "-v"])
pytest.main([__file__, "-v"])

View File

@@ -17,13 +17,14 @@ import os
import sys
import time
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(PROJECT_ROOT, "backend"))
from loguru import logger
from src.config.logging import setup_logging
from src.services.document_processor import DocumentProcessor, ProcessingResult
from src.services.storage.milvus_client import MilvusClient
from src.config.settings import settings
from app.config.logging import setup_logging
from app.services.document_processor import DocumentProcessor, ProcessingResult
from app.services.storage.milvus_client import MilvusClient
from app.config.settings import settings
# 设置日志
setup_logging(level="INFO")
@@ -66,7 +67,7 @@ def verify_embedding_model():
logger.info("=" * 50)
try:
from src.services.embedding.bge_m3_embedder import BGEM3Embedder
from app.services.embedding.bge_m3_embedder import BGEM3Embedder
embedder = BGEM3Embedder()
logger.success("嵌入模型加载成功")
@@ -126,9 +127,9 @@ def verify_sample_document():
"""
try:
from src.services.embedding.text_chunker import RegulationChunker
from src.services.embedding.bge_m3_embedder import BGEM3Embedder
from src.services.storage.milvus_client import MilvusClient
from app.services.embedding.text_chunker import RegulationChunker
from app.services.embedding.bge_m3_embedder import BGEM3Embedder
from app.services.storage.milvus_client import MilvusClient
# 1. 分块
logger.info("测试分块...")
@@ -219,4 +220,4 @@ def main():
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)
sys.exit(0 if success else 1)