Files
catonline_ai/vw-document-ai-indexer/env.yaml.example
2025-09-26 17:15:54 +08:00

43 lines
1.8 KiB
Plaintext

# Configuration file reference
config: config.yaml
# Processing settings
njobs: 8 # Number of parallel processing jobs
# Azure AI Search configuration
search_service_name: "https://your-search-service.search.windows.net"
search_admin_key: "your-search-admin-key"
# Azure OpenAI Embedding service
embedding_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-12-01-preview"
embedding_model_key: "your-openai-key"
VECTOR_DIMENSION: 1536
FLAG_AOAI: "V3" # Azure OpenAI version
FLAG_EMBEDDING_MODEL: "AOAI" # Embedding model type: "AOAI" or "qwen3-embedding-8b"
# Document Intelligence configuration
extract_method: "di+vision-llm" # Extraction method: "di+vision-llm", "vision-llm", "di"
form_rec_resource: "https://your-di-service.cognitiveservices.azure.com/"
form_rec_key: "your-di-key"
# Document Intelligence features
di-hiRes: true # High resolution OCR
di-Formulas: true # Mathematical expression detection
di_allow_features_ext: "pdf;jpeg;jpg;png;bmp;tiff;heif" # Supported file extensions
# Vision and captioning models
captioning_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
captioning_model_key: "your-openai-key"
vision_max_images: 200 # Maximum images to process per document (0 = no limit)
vision_image_method: "openai" # Image processing method: "openai"
# Blob storage for figures and DI results
FIGURE_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token"
DI_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token"
# Database configuration
DB_URI: "postgresql://user:password@host:port/database_name"
# Processing flags
header_fix: false # Enable/disable header fixing