43 lines
1.8 KiB
Plaintext
43 lines
1.8 KiB
Plaintext
# Configuration file reference
|
|
config: config.yaml
|
|
|
|
# Processing settings
|
|
njobs: 8 # Number of parallel processing jobs
|
|
|
|
# Azure AI Search configuration
|
|
search_service_name: "https://your-search-service.search.windows.net"
|
|
search_admin_key: "your-search-admin-key"
|
|
|
|
# Azure OpenAI Embedding service
|
|
embedding_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-12-01-preview"
|
|
embedding_model_key: "your-openai-key"
|
|
VECTOR_DIMENSION: 1536
|
|
FLAG_AOAI: "V3" # Azure OpenAI version
|
|
FLAG_EMBEDDING_MODEL: "AOAI" # Embedding model type: "AOAI" or "qwen3-embedding-8b"
|
|
|
|
# Document Intelligence configuration
|
|
extract_method: "di+vision-llm" # Extraction method: "di+vision-llm", "vision-llm", "di"
|
|
form_rec_resource: "https://your-di-service.cognitiveservices.azure.com/"
|
|
form_rec_key: "your-di-key"
|
|
|
|
# Document Intelligence features
|
|
di-hiRes: true # High resolution OCR
|
|
di-Formulas: true # Mathematical expression detection
|
|
di_allow_features_ext: "pdf;jpeg;jpg;png;bmp;tiff;heif" # Supported file extensions
|
|
|
|
# Vision and captioning models
|
|
captioning_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
|
|
captioning_model_key: "your-openai-key"
|
|
vision_max_images: 200 # Maximum images to process per document (0 = no limit)
|
|
vision_image_method: "openai" # Image processing method: "openai"
|
|
|
|
|
|
# Blob storage for figures and DI results
|
|
FIGURE_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token"
|
|
DI_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token"
|
|
|
|
# Database configuration
|
|
DB_URI: "postgresql://user:password@host:port/database_name"
|
|
|
|
# Processing flags
|
|
header_fix: false # Enable/disable header fixing |