Files
AIRegulation-DocAnalysis/.env
ash66 c22b03dc07 feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings.
- Added new documents with failure reasons and metadata to documents.json for better error tracking.
- Created a new documentation file detailing the Aliyun ingest implementation process.
- Updated RFC to reflect changes in the parsing backend and embedding dimensions.
- Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions.
- Verified migration configurations to ensure correct settings for embedding model and backend.
2026-05-18 22:30:28 +08:00

88 lines
2.3 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 环境变量配置 - 已有数据库服务
# AI+合规智能中枢
# ===== 应用配置 =====
APP_NAME=AI+合规智能中枢
APP_VERSION=0.1.0
DEBUG=false
# ===== Milvus向量数据库配置已有=====
MILVUS_HOST=localhost
MILVUS_PORT=19530
MILVUS_COLLECTION=regulations_dense_1024_v1
MILVUS_DB_NAME=default
# ===== MinIO对象存储配置已有=====
MINIO_ENDPOINT=localhost:9000
MINIO_ACCESS_KEY=minioadmin
MINIO_SECRET_KEY=minioadmin
MINIO_BUCKET=compliance-docs
MINIO_SECURE=false
# ===== Redis配置已有=====
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_PASSWORD=redis@123
REDIS_DB=0
# ===== PostgreSQL配置已有=====
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_USER=postgresql
POSTGRES_PASSWORD=postgresql123456
POSTGRES_DB=compliance_db
# ===== 嵌入模型配置 =====
EMBEDDING_MODEL=text-embedding-v3
EMBEDDING_DIM=1024
EMBEDDING_API_KEY=sk-fVr9KmDZNC4pGDBQj0EUWz9bDmFzNxjYC9EzZpe2bVDsxtz8
EMBEDDING_BASE_URL=http://6.86.80.4:30080/v1
EMBEDDING_TIMEOUT_SECONDS=120
# ===== 文档处理配置 =====
CHUNK_SIZE=512
CHUNK_OVERLAP=50
MAX_FILE_SIZE_MB=100
# ===== API配置 =====
API_HOST=0.0.0.0
API_PORT=8000
# ===== LLM配置 =====
# LLM提供商选择: qwen / deepseek / qwen_vl
LLM_PROVIDER=deepseek
LLM_MODEL=deepseek-v4-flash
LLM_MAX_TOKENS=4096
LLM_TEMPERATURE=0.7
# ===== Qwen API配置阿里云DashScope=====
# 获取API Key: https://dashscope.console.aliyun.com/
QWEN_API_KEY=sk-fVr9KmDZNC4pGDBQj0EUWz9bDmFzNxjYC9EzZpe2bVDsxtz8
QWEN_BASE_URL=http://6.86.80.4:30080/v1
QWEN_MODEL=qwen3.6-plus
QWEN_VL_MODEL=qwen3-vl-plus
# ===== DeepSeek API配置 =====
# 获取API Key: https://platform.deepseek.com/
DEEPSEEK_API_KEY=sk-fVr9KmDZNC4pGDBQj0EUWz9bDmFzNxjYC9EzZpe2bVDsxtz8
DEEPSEEK_BASE_URL=http://6.86.80.4:30080/v1
DEEPSEEK_MODEL=deepseek-v4-flash
# ===== RAG配置 =====
RAG_TOP_K=10
RAG_MAX_CONTEXT_TOKENS=4000
RAG_SUMMARY_MAX_TOKENS=1024
RAG_SKILLS_MAX_TOKENS=2048
# ===== 阿里云文档解析 =====
ALIBABA_ACCESS_KEY_ID=LTAI5t9ZjvwSU9bKuMyiExrE
ALIBABA_ACCESS_KEY_SECRET=hNvY6XocmEO6inYlrmiBwBcx5OfidL
ALIBABA_ENDPOINT=docmind-api.cn-hangzhou.aliyuncs.com
ALIYUN_PARSE_POLL_INTERVAL_SECONDS=5
ALIYUN_PARSE_TIMEOUT_SECONDS=900
ALIYUN_PARSE_LAYOUT_STEP_SIZE=50
ALIYUN_LLM_ENHANCEMENT=true
ALIYUN_ENHANCEMENT_MODE=VLM
DOCUMENT_PARSE_ARTIFACT_PREFIX=artifacts
PARSER_FAILURE_MODE=fail