Fix SSE route dependency and align architecture docs

This commit is contained in:
ash66
2026-05-18 16:32:42 +08:00
parent 86b9ac806a
commit 3f69cad404
149 changed files with 4786 additions and 5957 deletions

View File

@@ -1,4 +1,8 @@
"""Initialize the app.utils package."""
from .chunking import TextChunker, chunker
from .logger import logger, setup_logging
# Keep package boundaries explicit so backend imports stay predictable.
__all__ = ["TextChunker", "chunker", "logger", "setup_logging"]

View File

@@ -1,19 +1,25 @@
"""Provide utility helpers for chunking."""
import re
from typing import List
from app.core.config import settings
# Keep module behavior explicit so the backend flow stays easy to audit.
class TextChunker:
"""Represent the Text Chunker type."""
def __init__(
self,
chunk_size: int = settings.chunk_size,
chunk_overlap: int = settings.chunk_overlap,
):
"""Initialize the Text Chunker instance."""
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
def chunk_by_clause(self, text: str) -> List[dict]:
"""按条款边界分块(适用于法规文档)"""
"""Handle chunk by clause for the Text Chunker instance."""
clause_pattern = r"(第[一二三四五六七八九十百]+条)"
parts = re.split(clause_pattern, text)
@@ -46,7 +52,7 @@ class TextChunker:
return chunks
def chunk_by_size(self, text: str) -> List[dict]:
"""按固定大小分块"""
"""Handle chunk by size for the Text Chunker instance."""
chunks = []
start = 0
chunk_index = 0
@@ -69,7 +75,7 @@ class TextChunker:
return chunks
def estimate_tokens(self, text: str) -> int:
"""估算token数量"""
"""Handle estimate tokens for the Text Chunker instance."""
chinese_chars = len(re.findall(r"[^\x00-\xff]", text))
english_chars = len(text) - chinese_chars
return int(chinese_chars / 1.5 + english_chars / 4)

View File

@@ -1,9 +1,13 @@
"""Provide utility helpers for logger."""
import logging
import sys
# Keep module behavior explicit so the backend flow stays easy to audit.
def setup_logging() -> logging.Logger:
"""配置日志"""
"""Handle setup logging."""
logger = logging.getLogger("app")
logger.setLevel(logging.INFO)