Files
AIRegulation-DocAnalysis/backend/app/core/config.py
ash66 c22b03dc07 feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings.
- Added new documents with failure reasons and metadata to documents.json for better error tracking.
- Created a new documentation file detailing the Aliyun ingest implementation process.
- Updated RFC to reflect changes in the parsing backend and embedding dimensions.
- Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions.
- Verified migration configurations to ensure correct settings for embedding model and backend.
2026-05-18 22:30:28 +08:00

55 lines
1.5 KiB
Python

"""Legacy-compatible config used by older utility modules."""
from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict
# Keep legacy settings aligned with the root-level env loading rules.
ROOT_DIR = Path(__file__).resolve().parents[3]
ROOT_ENV_FILES = tuple(str(path) for path in (ROOT_DIR / ".env", ROOT_DIR / ".env.development"))
class Settings(BaseSettings):
# DashScope API
"""Define configuration for settings."""
model_config = SettingsConfigDict(
env_file=ROOT_ENV_FILES,
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
dashscope_api_key: str = ""
# Milvus
milvus_host: str = "localhost"
milvus_port: int = 19530
milvus_collection: str = "regulations_dense_1024_v1"
# LLM / embedding defaults aligned with the migrated backend path.
llm_model: str = "qwen-max"
embedding_model: str = "text-embedding-v3"
embedding_dim: int = 1024
# Legacy workflow compatibility only.
vector_top_k: int = 10
final_top_k: int = 5
# Legacy local chunking compatibility only; main ingest now uses Aliyun vector_chunks.
chunk_size: int = 800
chunk_overlap: int = 50
# Service config.
api_host: str = "0.0.0.0"
api_port: int = 8000
# Legacy aliases retained for old utility modules.
regulations_collection: str = "regulations_dense_1024_v1"
compliance_collection: str = "compliance_cache"
# Preserve the legacy module API while keeping env resolution centralized at the repo root.
settings = Settings()