# Configuration file reference config: config.yaml # Processing settings njobs: 8 # Number of parallel processing jobs # Azure AI Search configuration search_service_name: "https://your-search-service.search.windows.net" search_admin_key: "your-search-admin-key" # Azure OpenAI Embedding service embedding_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-12-01-preview" embedding_model_key: "your-openai-key" VECTOR_DIMENSION: 1536 FLAG_AOAI: "V3" # Azure OpenAI version FLAG_EMBEDDING_MODEL: "AOAI" # Embedding model type: "AOAI" or "qwen3-embedding-8b" # Document Intelligence configuration extract_method: "di+vision-llm" # Extraction method: "di+vision-llm", "vision-llm", "di" form_rec_resource: "https://your-di-service.cognitiveservices.azure.com/" form_rec_key: "your-di-key" # Document Intelligence features di-hiRes: true # High resolution OCR di-Formulas: true # Mathematical expression detection di_allow_features_ext: "pdf;jpeg;jpg;png;bmp;tiff;heif" # Supported file extensions # Vision and captioning models captioning_model_endpoint: "https://your-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview" captioning_model_key: "your-openai-key" vision_max_images: 200 # Maximum images to process per document (0 = no limit) vision_image_method: "openai" # Image processing method: "openai" # Blob storage for figures and DI results FIGURE_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token" DI_BLOB_ACCOUNT_URL: "https://your-storage.blob.core.windows.net/container?sas-token" # Database configuration DB_URI: "postgresql://user:password@host:port/database_name" # Processing flags header_fix: false # Enable/disable header fixing