init
This commit is contained in:
43
vw-document-ai-indexer/env.yaml.example
Normal file
43
vw-document-ai-indexer/env.yaml.example
Normal file
@@ -0,0 +1,43 @@
|
||||
# Configuration file reference
|
||||
config: config.yaml
|
||||
|
||||
# Processing settings
|
||||
njobs: 8 # Number of parallel processing jobs
|
||||
|
||||
# Azure AI Search configuration
|
||||
search_service_name: "https://your-search-service.search.windows.net"
|
||||
search_admin_key: "your-search-admin-key"
|
||||
|
||||
# Azure OpenAI Embedding service
|
||||
embedding_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-12-01-preview"
|
||||
embedding_model_key: "your-openai-key"
|
||||
VECTOR_DIMENSION: 1536
|
||||
FLAG_AOAI: "V3" # Azure OpenAI version
|
||||
FLAG_EMBEDDING_MODEL: "AOAI" # Embedding model type: "AOAI" or "qwen3-embedding-8b"
|
||||
|
||||
# Document Intelligence configuration
|
||||
extract_method: "di+vision-llm" # Extraction method: "di+vision-llm", "vision-llm", "di"
|
||||
form_rec_resource: "https://your-di-service.cognitiveservices.azure.com/"
|
||||
form_rec_key: "your-di-key"
|
||||
|
||||
# Document Intelligence features
|
||||
di-hiRes: true # High resolution OCR
|
||||
di-Formulas: true # Mathematical expression detection
|
||||
di_allow_features_ext: "pdf;jpeg;jpg;png;bmp;tiff;heif" # Supported file extensions
|
||||
|
||||
# Vision and captioning models
|
||||
captioning_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
|
||||
captioning_model_key: "your-openai-key"
|
||||
vision_max_images: 200 # Maximum images to process per document (0 = no limit)
|
||||
vision_image_method: "openai" # Image processing method: "openai"
|
||||
|
||||
|
||||
# Blob storage for figures and DI results
|
||||
FIGURE_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token"
|
||||
DI_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token"
|
||||
|
||||
# Database configuration
|
||||
DB_URI: "postgresql://user:password@host:port/database_name"
|
||||
|
||||
# Processing flags
|
||||
header_fix: false # Enable/disable header fixing
|
||||
Reference in New Issue
Block a user