Files
AIRegulation-DocAnalysis/backend/app/config/settings.py
wangwei 9fea9c6a53 1. Add 登陆功能
2. 调整字体大小
3. 新增部分功能
2026-06-05 18:00:31 +08:00

161 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Configure backend settings for the backend application."""
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
from functools import lru_cache
# Keep configuration setup explicit so runtime behavior is easy to reason about.
ROOT_DIR = Path(__file__).resolve().parents[3]
ROOT_ENV_FILES = (
ROOT_DIR / ".env",
ROOT_DIR / ".env.development",
)
class Settings(BaseSettings):
"""Define configuration for settings."""
model_config = SettingsConfigDict(
env_file=tuple(str(env_file) for env_file in ROOT_ENV_FILES),
env_file_encoding="utf-8",
extra="ignore",
)
# Keep configuration setup explicit so runtime behavior is easy to reason about.
app_name: str = Field(default="AI Regulations Demo", description="Application name")
app_version: str = Field(default="0.1.0", description="应用版本")
debug: bool = Field(default=False, description="调试模式")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
milvus_host: str = Field(default="6.86.80.8", description="Milvus服务地址")
milvus_port: int = Field(default=19530, description="Milvus服务端口")
milvus_collection: str = Field(default="regulations_dense_1024_v2", description="法规向量集合名称")
milvus_db_name: str = Field(default="default", description="Milvus数据库名称")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
embedding_model: str = Field(default="text-embedding-v3", description="嵌入模型名称")
embedding_dim: int = Field(default=1024, description="嵌入向量维度")
embedding_api_key: str = Field(default="", description="Embedding API密钥")
embedding_base_url: str = Field(default="http://6.86.80.4:30080/v1", description="Embedding API地址")
embedding_timeout_seconds: int = Field(default=120, description="Embedding API超时时间(秒)")
alibaba_access_key_id: str = Field(default="", description="阿里云文档解析 Access Key ID")
alibaba_access_key_secret: str = Field(default="", description="阿里云文档解析 Access Key Secret")
alibaba_endpoint: str = Field(default="docmind-api.cn-hangzhou.aliyuncs.com", description="阿里云文档解析 endpoint")
aliyun_parse_poll_interval_seconds: int = Field(default=5, description="阿里云文档解析轮询间隔(秒)")
aliyun_parse_timeout_seconds: int = Field(default=900, description="阿里云文档解析超时时间(秒)")
aliyun_parse_layout_step_size: int = Field(default=50, description="阿里云文档解析分页步长")
aliyun_llm_enhancement: bool = Field(default=True, description="是否启用阿里云解析增强")
aliyun_enhancement_mode: str = Field(default="VLM", description="阿里云解析增强模式")
document_parse_artifact_prefix: str = Field(default="artifacts", description="解析产物对象前缀")
parser_failure_mode: str = Field(default="fail", description="解析失败策略")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
minio_endpoint: str = Field(default="6.86.80.8:9000", description="MinIO服务地址")
minio_access_key: str = Field(default="minioadmin", description="MinIO访问密钥")
minio_secret_key: str = Field(default="minioadmin123", description="MinIO秘密密钥")
minio_bucket: str = Field(default="upload-files", description="文档存储桶名称")
minio_secure: bool = Field(default=False, description="是否使用HTTPS")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
redis_host: str = Field(default="6.86.80.8", description="Redis服务地址")
redis_port: int = Field(default=6379, description="Redis服务端口")
redis_password: str = Field(default="", description="Redis密码")
redis_db: int = Field(default=0, description="Redis数据库编号")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
postgres_host: str = Field(default="6.86.80.8", description="PostgreSQL服务地址")
postgres_port: int = Field(default=5432, description="PostgreSQL服务端口")
postgres_user: str = Field(default="compliance", description="PostgreSQL用户名")
postgres_password: str = Field(default="compliance123", description="PostgreSQL密码")
postgres_db: str = Field(default="compliance_db", description="PostgreSQL数据库名称")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
chunk_size: int = Field(default=512, description="分块大小(字符数)")
chunk_overlap: int = Field(default=50, description="分块重叠大小")
max_file_size_mb: int = Field(default=100, description="最大文件大小(MB)")
document_metadata_path: str = Field(default="backend/data/documents.json", description="文档元数据存储路径")
document_processing_metadata_path: str = Field(default="backend/data/document_processing.json", description="文档处理历史存储路径")
parser_backend: str = Field(default="aliyun", description="解析后端(local/aliyun)")
chunk_backend: str = Field(default="aliyun", description="分块后端(local/aliyun)")
document_repository_backend: str = Field(default="json", description="文档元数据存储后端 (json/postgres)")
# When True, document processing is enqueued to Celery workers via Redis.
# When False (default), processing runs in a FastAPI BackgroundTask in the same process —
# no external worker needed. Switch to True only when a Celery worker is running.
use_celery_worker: bool = Field(default=False, description="使用 Celery Worker 异步处理文档 (需要 Worker 运行中)")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
api_host: str = Field(default="0.0.0.0", description="API服务地址")
api_port: int = Field(default=8000, description="API服务端口")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
llm_provider: str = Field(default="deepseek", description="LLM提供商 (deepseek/qwen/qwen_vl)")
llm_model: str = Field(default="deepseek-v4-flash", description="LLM模型名称")
llm_max_tokens: int = Field(default=4096, description="LLM最大输出token数")
llm_temperature: float = Field(default=0.7, description="LLM温度参数")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
deepseek_api_key: str = Field(default="", description="DeepSeek API密钥")
deepseek_base_url: str = Field(default="http://6.86.80.4:30080/v1", description="DeepSeek API地址")
deepseek_model: str = Field(default="deepseek-v4-flash", description="DeepSeek模型")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
qwen_api_key: str = Field(default="", description="Qwen API密钥")
qwen_base_url: str = Field(default="http://6.86.80.4:30080/v1", description="Qwen API地址")
qwen_model: str = Field(default="qwen3.5-flash", description="Qwen文本模型")
qwen_vl_model: str = Field(default="qwen3-vl-plus", description="Qwen视觉模型")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
rag_top_k: int = Field(default=5, description="检索召回数量")
rag_retrieval_top_k: int = Field(default=20, description="精排前召回候选数量reranker 启用时生效)")
rag_max_context_tokens: int = Field(default=2000, description="RAG最大上下文token数")
rag_summary_max_tokens: int = Field(default=10240, description="文档摘要最大token数")
rag_skills_max_tokens: int = Field(default=2048, description="技能类 RAG 最大 token 数")
reranker_enabled: bool = Field(default=False, description="是否启用 Cross-Encoder 精排")
reranker_base_url: str = Field(default="", description="Reranker API 地址")
reranker_model: str = Field(default="BAAI/bge-reranker-v2-m3", description="Reranker 模型名称")
reranker_api_key: str = Field(default="", description="Reranker API 密钥")
reranker_top_k: int = Field(default=5, description="精排后保留的最终结果数量")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
milvus_index_type: str = Field(default="IVF_FLAT", description="Milvus索引类型")
milvus_nlist: int = Field(default=128, description="Milvus nlist参数")
milvus_nprobe: int = Field(default=16, description="Milvus nprobe参数")
# Keep configuration setup explicit so runtime behavior is easy to reason about.
session_max_sessions: int = Field(default=100, description="最大会话数量")
session_timeout_minutes: int = Field(default=30, description="会话超时时间(分钟)")
session_backend: str = Field(
default="memory",
description="会话存储后端 (memory | redis)。redis 需要 Redis 可用。",
)
# ── Auth ──────────────────────────────────────────────────────────────────
# Generate a strong secret: python -c "import secrets; print(secrets.token_hex(32))"
auth_secret_key: str = Field(
default="change-me-in-production-must-be-32-or-more-characters-long",
description="JWT signing secret. MUST be changed in production.",
)
auth_algorithm: str = Field(default="HS256", description="JWT signing algorithm.")
auth_token_expire_minutes: int = Field(default=480, description="JWT TTL in minutes (default 8 hours).")
auth_enabled: bool = Field(default=True, description="Set False to bypass auth (development only).")
# ── CORS ──────────────────────────────────────────────────────────────────
cors_allow_origins: str = Field(
default="http://localhost:5173",
description="Comma-separated allowed CORS origins. Never use * in production.",
)
@lru_cache
def get_settings() -> Settings:
"""Return settings."""
return Settings()
# Keep configuration setup explicit so runtime behavior is easy to reason about.
settings = get_settings()