init
This commit is contained in:
51
vw-document-ai-indexer/env.yaml
Normal file
51
vw-document-ai-indexer/env.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
config: config.yaml
|
||||
njobs: 1
|
||||
|
||||
|
||||
search_service_name: https://<resource name>.search.windows.net
|
||||
search_admin_key:
|
||||
|
||||
embedding_model_endpoint: https://<resource name>.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-12-01-preview
|
||||
embedding_model_key:
|
||||
VECTOR_DIMENSION: 1536
|
||||
|
||||
extract_method: di+vision-llm
|
||||
|
||||
# extract_method=vision-llm
|
||||
|
||||
form_rec_resource: https://<resource name>.cognitiveservices.azure.cn/
|
||||
form_rec_key:
|
||||
|
||||
# Perform OCR at a higher resolution to handle documents with fine print
|
||||
di-hiRes: true
|
||||
# Enable the detection of mathematical expressions in the document.
|
||||
di-Formulas: true
|
||||
di_allow_features_ext: pdf;jpep;jpg;png;bmp;tiff;heif
|
||||
|
||||
|
||||
|
||||
# 图片理解
|
||||
figure_caption:
|
||||
include_di_content: false
|
||||
description_gen_max_images: 0
|
||||
model_endpoint: null
|
||||
model_key: null
|
||||
model: null # azure 留空
|
||||
azure_deployment: gpt-4o # azure 部署名称,其他平台模型留空
|
||||
api_version: 2024-08-01-preview # azure api版本,其他平台留空
|
||||
|
||||
|
||||
FLAG_AOAI: "V3"
|
||||
#FLAG_EMBEDDING_MODEL: "qwen3-embedding-8b"
|
||||
FLAG_EMBEDDING_MODEL: "AOAI"
|
||||
|
||||
|
||||
FIGURE_BLOB_ACCOUNT_URL: https://blob sas url
|
||||
|
||||
DI_BLOB_ACCOUNT_URL: https://blob sas url
|
||||
|
||||
DB_URI: postgresql+psycopg2://user:passwords@localhost:5433/document_indexer
|
||||
|
||||
header_fix: true
|
||||
|
||||
|
||||
Reference in New Issue
Block a user