Files
AIRegulation-DocAnalysis/backend/app/config/settings.py

173 lines
11 KiB
Python
Raw Normal View History

"""Configure backend settings for the backend application."""
2026-05-14 15:07:34 +08:00
from pathlib import Path
2026-05-14 15:07:34 +08:00
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
2026-05-14 15:07:34 +08:00
from functools import lru_cache
# Keep configuration setup explicit so runtime behavior is easy to reason about.
ROOT_DIR = Path(__file__).resolve().parents[3]
ROOT_ENV_FILES = (
ROOT_DIR / ".env",
ROOT_DIR / ".env.development",
)
2026-05-14 15:07:34 +08:00
class Settings(BaseSettings):
"""Define configuration for settings."""
model_config = SettingsConfigDict(
env_file=tuple(str(env_file) for env_file in ROOT_ENV_FILES),
env_file_encoding="utf-8",
extra="ignore",
)
2026-05-14 15:07:34 +08:00
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
app_name: str = Field(default="AI Regulations Demo", description="Application name")
app_version: str = Field(default="0.1.0", description="应用版本")
debug: bool = Field(default=False, description="调试模式")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
milvus_host: str = Field(default="6.86.80.8", description="Milvus服务地址")
2026-05-14 15:07:34 +08:00
milvus_port: int = Field(default=19530, description="Milvus服务端口")
milvus_collection: str = Field(default="regulations_dense_1024_v2", description="法规向量集合名称")
2026-05-14 15:07:34 +08:00
milvus_db_name: str = Field(default="default", description="Milvus数据库名称")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
embedding_model: str = Field(default="text-embedding-v3", description="嵌入模型名称")
embedding_dim: int = Field(default=1024, description="嵌入向量维度")
embedding_api_key: str = Field(default="", description="Embedding API密钥")
embedding_base_url: str = Field(default="http://6.86.80.4:30080/v1", description="Embedding API地址")
embedding_timeout_seconds: int = Field(default=120, description="Embedding API超时时间(秒)")
alibaba_access_key_id: str = Field(default="", description="阿里云文档解析 Access Key ID")
alibaba_access_key_secret: str = Field(default="", description="阿里云文档解析 Access Key Secret")
alibaba_endpoint: str = Field(default="docmind-api.cn-hangzhou.aliyuncs.com", description="阿里云文档解析 endpoint")
aliyun_parse_poll_interval_seconds: int = Field(default=5, description="阿里云文档解析轮询间隔(秒)")
aliyun_parse_timeout_seconds: int = Field(default=900, description="阿里云文档解析超时时间(秒)")
aliyun_parse_layout_step_size: int = Field(default=50, description="阿里云文档解析分页步长")
aliyun_llm_enhancement: bool = Field(default=True, description="是否启用阿里云解析增强")
aliyun_enhancement_mode: str = Field(default="VLM", description="阿里云解析增强模式")
document_parse_artifact_prefix: str = Field(default="artifacts", description="解析产物对象前缀")
parser_failure_mode: str = Field(default="fail", description="解析失败策略")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
minio_endpoint: str = Field(default="6.86.80.8:9000", description="MinIO服务地址")
2026-05-14 15:07:34 +08:00
minio_access_key: str = Field(default="minioadmin", description="MinIO访问密钥")
minio_secret_key: str = Field(default="minioadmin123", description="MinIO秘密密钥")
minio_bucket: str = Field(default="upload-files", description="文档存储桶名称")
minio_secure: bool = Field(default=False, description="是否使用HTTPS")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
redis_host: str = Field(default="6.86.80.8", description="Redis服务地址")
2026-05-14 15:07:34 +08:00
redis_port: int = Field(default=6379, description="Redis服务端口")
redis_password: str = Field(default="", description="Redis密码")
redis_db: int = Field(default=0, description="Redis数据库编号")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
postgres_host: str = Field(default="6.86.80.8", description="PostgreSQL服务地址")
2026-05-14 15:07:34 +08:00
postgres_port: int = Field(default=5432, description="PostgreSQL服务端口")
postgres_user: str = Field(default="compliance", description="PostgreSQL用户名")
postgres_password: str = Field(default="compliance123", description="PostgreSQL密码")
postgres_db: str = Field(default="compliance_db", description="PostgreSQL数据库名称")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
chunk_size: int = Field(default=512, description="分块大小(字符数)")
chunk_overlap: int = Field(default=50, description="分块重叠大小")
max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)")
document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径")
2026-05-26 12:34:12 +08:00
document_processing_metadata_path: str = Field(default="backend/data/document_processing.json", description="文档处理历史存储路径")
parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)")
chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)")
document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)")
# When True, document processing is enqueued to Celery workers via Redis.
# When False (default), processing runs in a FastAPI BackgroundTask in the same process —
# no external worker needed. Switch to True only when a Celery worker is running.
use_celery_worker: bool = Field(default=False, description="使用 Celery Worker 异步处理文档 (需要 Worker 运行中)")
2026-05-14 15:07:34 +08:00
2026-06-08 11:16:28 +08:00
# ── Perception crawl ──────────────────────────────────────────────────────
perception_crawl_timeout_seconds: int = Field(
default=120, description="HTTP timeout for regulatory source crawlers."
)
perception_max_events_per_source: int = Field(
default=100, description="Maximum events fetched per source per crawl run."
)
perception_diff_similarity_threshold: float = Field(
default=0.85,
description="Cosine similarity below which a paragraph is flagged as changed.",
)
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
api_host: str = Field(default="0.0.0.0", description="API服务地址")
api_port: int = Field(default=8000, description="API服务端口")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
llm_provider: str = Field(default="deepseek", description="LLM提供商 (deepseek/qwen/qwen_vl)")
llm_model: str = Field(default="deepseek-v4-flash", description="LLM模型名称")
llm_max_tokens: int = Field(default=4096, description="LLM最大输出token数")
llm_temperature: float = Field(default=0.7, description="LLM温度参数")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
deepseek_api_key: str = Field(default="", description="DeepSeek API密钥")
deepseek_base_url: str = Field(default="http://6.86.80.4:30080/v1", description="DeepSeek API地址")
deepseek_model: str = Field(default="deepseek-v4-flash", description="DeepSeek模型")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
qwen_api_key: str = Field(default="", description="Qwen API密钥")
qwen_base_url: str = Field(default="http://6.86.80.4:30080/v1", description="Qwen API地址")
qwen_model: str = Field(default="qwen3.5-flash", description="Qwen文本模型")
qwen_vl_model: str = Field(default="qwen3-vl-plus", description="Qwen视觉模型")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
rag_top_k: int = Field(default=5, description="检索召回数量")
rag_retrieval_top_k: int = Field(default=20, description="精排前召回候选数量reranker 启用时生效)")
2026-05-14 15:07:34 +08:00
rag_max_context_tokens: int = Field(default=2000, description="RAG最大上下文token数")
rag_summary_max_tokens: int = Field(default=10240, description="文档摘要最大token数")
rag_skills_max_tokens: int = Field(default=2048, description="技能类 RAG 最大 token 数")
2026-05-14 15:07:34 +08:00
reranker_enabled: bool = Field(default=False, description="是否启用 Cross-Encoder 精排")
reranker_base_url: str = Field(default="", description="Reranker API 地址")
reranker_model: str = Field(default="BAAI/bge-reranker-v2-m3", description="Reranker 模型名称")
reranker_api_key: str = Field(default="", description="Reranker API 密钥")
reranker_top_k: int = Field(default=5, description="精排后保留的最终结果数量")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
milvus_index_type: str = Field(default="IVF_FLAT", description="Milvus索引类型")
milvus_nlist: int = Field(default=128, description="Milvus nlist参数")
milvus_nprobe: int = Field(default=16, description="Milvus nprobe参数")
2026-05-14 15:07:34 +08:00
# Keep configuration setup explicit so runtime behavior is easy to reason about.
session_max_sessions: int = Field(default=100, description="最大会话数量")
session_timeout_minutes: int = Field(default=30, description="会话超时时间(分钟)")
session_backend: str = Field(
default="memory",
description="会话存储后端 (memory | redis)。redis 需要 Redis 可用。",
)
# ── Auth ──────────────────────────────────────────────────────────────────
# Generate a strong secret: python -c "import secrets; print(secrets.token_hex(32))"
auth_secret_key: str = Field(
default="change-me-in-production-must-be-32-or-more-characters-long",
description="JWT signing secret. MUST be changed in production.",
)
auth_algorithm: str = Field(default="HS256", description="JWT signing algorithm.")
auth_token_expire_minutes: int = Field(default=480, description="JWT TTL in minutes (default 8 hours).")
auth_enabled: bool = Field(default=True, description="Set False to bypass auth (development only).")
# ── CORS ──────────────────────────────────────────────────────────────────
cors_allow_origins: str = Field(
default="http://localhost:5173",
description="Comma-separated allowed CORS origins. Never use * in production.",
)
2026-05-14 15:07:34 +08:00
@lru_cache
def get_settings() -> Settings:
"""Return settings."""
2026-05-14 15:07:34 +08:00
return Settings()
# Keep configuration setup explicit so runtime behavior is easy to reason about.
2026-05-14 15:07:34 +08:00
settings = get_settings()