52 lines
1.3 KiB
YAML
52 lines
1.3 KiB
YAML
|
|
config: config.yaml
|
||
|
|
njobs: 1
|
||
|
|
|
||
|
|
|
||
|
|
search_service_name: https://<resource name>.search.windows.net
|
||
|
|
search_admin_key:
|
||
|
|
|
||
|
|
embedding_model_endpoint: https://<resource name>.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-12-01-preview
|
||
|
|
embedding_model_key:
|
||
|
|
VECTOR_DIMENSION: 1536
|
||
|
|
|
||
|
|
extract_method: di+vision-llm
|
||
|
|
|
||
|
|
# extract_method=vision-llm
|
||
|
|
|
||
|
|
form_rec_resource: https://<resource name>.cognitiveservices.azure.cn/
|
||
|
|
form_rec_key:
|
||
|
|
|
||
|
|
# Perform OCR at a higher resolution to handle documents with fine print
|
||
|
|
di-hiRes: true
|
||
|
|
# Enable the detection of mathematical expressions in the document.
|
||
|
|
di-Formulas: true
|
||
|
|
di_allow_features_ext: pdf;jpep;jpg;png;bmp;tiff;heif
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
# 图片理解
|
||
|
|
figure_caption:
|
||
|
|
include_di_content: false
|
||
|
|
description_gen_max_images: 0
|
||
|
|
model_endpoint: null
|
||
|
|
model_key: null
|
||
|
|
model: null # azure 留空
|
||
|
|
azure_deployment: gpt-4o # azure 部署名称,其他平台模型留空
|
||
|
|
api_version: 2024-08-01-preview # azure api版本,其他平台留空
|
||
|
|
|
||
|
|
|
||
|
|
FLAG_AOAI: "V3"
|
||
|
|
#FLAG_EMBEDDING_MODEL: "qwen3-embedding-8b"
|
||
|
|
FLAG_EMBEDDING_MODEL: "AOAI"
|
||
|
|
|
||
|
|
|
||
|
|
FIGURE_BLOB_ACCOUNT_URL: https://blob sas url
|
||
|
|
|
||
|
|
DI_BLOB_ACCOUNT_URL: https://blob sas url
|
||
|
|
|
||
|
|
DB_URI: postgresql+psycopg2://user:passwords@localhost:5433/document_indexer
|
||
|
|
|
||
|
|
header_fix: true
|
||
|
|
|
||
|
|
|