init
This commit is contained in:
27
vw-document-ai-indexer/deploy/prd-usermanual/config.yaml
Normal file
27
vw-document-ai-indexer/deploy/prd-usermanual/config.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
- data_path: "https://sasales2caiprd.blob.core.chinacloudapi.cn/doc-landing-cat-usermanual-prd?sp=racwdl&st=2025-08-27T06:26:11Z&se=2035-08-27T14:41:11Z&spr=https&sv=2024-11-04&sr=c&sig=7GVqfbWPM5VDRW8crTeR06KsSPX%2BuuDLjN7ceqBuLCE%3D"
|
||||
datasource_name: "cat-usermanual-prd"
|
||||
data_dir: ""
|
||||
base_path: "/app/run_tmp"
|
||||
process_file_num: 0
|
||||
process_file_last_modify: "2025-06-24 00:00:00"
|
||||
chunk_size: 2048
|
||||
token_overlap: 128
|
||||
index_schemas:
|
||||
- index_name: "index-cat-usermanual-chunk-prd"
|
||||
data_type: ["chunk"]
|
||||
field_type: "append"
|
||||
upload_batch_size: 50
|
||||
fields: ["filepath", "title"]
|
||||
full_metadata_vector_fields: ["full_headers", "doc_metadata"]
|
||||
semantic_config_name: "default"
|
||||
vector_config_name: "vectorSearchProfile"
|
||||
update_by_field: "filepath"
|
||||
vector_fields:
|
||||
- field: "contentVector"
|
||||
append_fields: ["content"]
|
||||
- field: "full_metadata_vector"
|
||||
append_fields: ["full_headers", "doc_metadata"]
|
||||
merge_fields:
|
||||
- key: "doc_metadata"
|
||||
fields: ["title"]
|
||||
full_metadata_vector_fields: ["full_headers", "doc_metadata"]
|
||||
50
vw-document-ai-indexer/deploy/prd-usermanual/deploy.sh
Normal file
50
vw-document-ai-indexer/deploy/prd-usermanual/deploy.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
|
||||
# docker build
|
||||
docker login acrsales2caiprd.azurecr.cn -u username -p password
|
||||
docker build . -t document-ai-indexer:2.0.4
|
||||
docker tag document-ai-indexer:2.0.4 acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.4
|
||||
docker push acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.4
|
||||
|
||||
|
||||
# login AKS
|
||||
# az cloud set --name AzureCloud # Switch CLI to Azure cloud
|
||||
# az login # Log in to Azure China account (browser or device code flow)
|
||||
# az account set -s 079d8bd8-b4cc-4892-9307-aa6dedf890e9 #! set subs
|
||||
# az aks get-credentials -g rg-aiflow-lab -n aks-sales2c-ai-prd --overwrite-existing --file ~/.kube/config
|
||||
kubectl config use-context aks-sales2c-ai-prd
|
||||
kubectl config current-context
|
||||
|
||||
# Create Azure Files Volume
|
||||
# kubectl create secret generic azure-files-cred \
|
||||
# --from-literal=azurestorageaccountname=saaisearchlab \
|
||||
# --from-literal=azurestorageaccountkey=xxxxxxxxxxxxxxxxxxxx \
|
||||
# -n knowledge-agent
|
||||
|
||||
# kubectl delete configmap document-ai-indexer-usermanual-config -n knowledge-agent
|
||||
|
||||
# Deploy ConfigMap
|
||||
kubectl delete configmap document-ai-indexer-usermanual-config -n knowledge-agent
|
||||
kubectl create configmap document-ai-indexer-usermanual-config -n knowledge-agent --from-file=.\deploy\prd-usermanual\env.yaml --from-file=.\deploy\prd-usermanual\config.yaml --from-file=prompt.yaml
|
||||
|
||||
# Deploy Pod
|
||||
# kubectl create namespace knowledge-agent
|
||||
# kubectl delete pod document-ai-indexer-usermanual -n knowledge-agent
|
||||
kubectl apply -f .\deploy\prd-usermanual\document-ai-indexer-usermanual.yml -n knowledge-agent
|
||||
|
||||
# Monitor Pod
|
||||
kubectl logs -f document-ai-indexer-usermanual -n knowledge-agent
|
||||
|
||||
# Deploy CronJob
|
||||
kubectl apply -f deploy/prd-usermanual/document-ai-indexer-cronjob.yml --namespace knowledge-agent
|
||||
|
||||
# Check CronJob Status
|
||||
kubectl get cronjobs -n knowledge-agent --namespace knowledge-agent
|
||||
# Check Job Execution History
|
||||
kubectl get jobs -n knowledge-agent --namespace knowledge-agent
|
||||
|
||||
###########
|
||||
# Manually trigger a job (for testing)
|
||||
kubectl delete job manual-test -n knowledge-agent
|
||||
kubectl create job --from=cronjob/document-ai-indexer-cronjob manual-test -n knowledge-agent
|
||||
# Check Job Logs
|
||||
kubectl logs -f job/manual-test -n knowledge-agent
|
||||
@@ -0,0 +1,64 @@
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: document-ai-indexer-cronjob
|
||||
spec:
|
||||
# Scheduling configuration - execute every 10 minutes
|
||||
schedule: "*/10 * * * *"
|
||||
|
||||
# Concurrency policy: Disable concurrent execution. If the previous job is still running, new execution will be skipped.
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
# Successful jobs history limit: Keep the last 3 successful job records.
|
||||
successfulJobsHistoryLimit: 10
|
||||
|
||||
# Failed jobs history limit: Keep the last failed job record.
|
||||
failedJobsHistoryLimit: 10
|
||||
|
||||
# Job template
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: document-ai-indexer
|
||||
job-type: cronjob
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
volumes:
|
||||
# 1. ConfigMap volume
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: document-ai-indexer-config
|
||||
items:
|
||||
- key: env.yaml
|
||||
path: env.yaml
|
||||
- key: config.yaml
|
||||
path: config.yaml
|
||||
|
||||
# 2. Azure File Share volume
|
||||
- name: data-volume
|
||||
azureFile:
|
||||
secretName: azure-files-cred # Quoting what you created Secret
|
||||
shareName: fs-document-ai-indexer # Your file share name
|
||||
readOnly: false # Write permission
|
||||
|
||||
containers:
|
||||
- name: document-ai-indexer
|
||||
image: acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.1
|
||||
imagePullPolicy: Always
|
||||
# Mount the volume into the container
|
||||
volumeMounts:
|
||||
# ConfigMap Mount
|
||||
- name: config-volume
|
||||
mountPath: /app/env.yaml
|
||||
subPath: env.yaml
|
||||
- name: config-volume
|
||||
mountPath: /app/config.yaml
|
||||
subPath: config.yaml
|
||||
|
||||
# Azure File Shared mount
|
||||
- name: data-volume
|
||||
mountPath: /app/run_tmp # Program write/read directory
|
||||
@@ -0,0 +1,47 @@
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: document-ai-indexer-usermanual
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
volumes:
|
||||
# 1. ConfigMap volume
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: document-ai-indexer-usermanual-config
|
||||
items:
|
||||
- key: env.yaml
|
||||
path: env.yaml
|
||||
- key: config.yaml
|
||||
path: config.yaml
|
||||
- key: prompt.yaml
|
||||
path: prompt.yaml
|
||||
|
||||
# 2. Azure File Share volume
|
||||
- name: data-volume
|
||||
azureFile:
|
||||
secretName: azure-files-cred # Quoting what you created Secret
|
||||
shareName: fs-document-ai-indexer # Your file share name
|
||||
readOnly: false
|
||||
|
||||
containers:
|
||||
- name: document-ai-indexer-usermanual
|
||||
image: acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.4
|
||||
imagePullPolicy: Always
|
||||
# Mount the volume into the container
|
||||
volumeMounts:
|
||||
# ConfigMap Mount
|
||||
- name: config-volume
|
||||
mountPath: /app/env.yaml
|
||||
subPath: env.yaml
|
||||
- name: config-volume
|
||||
mountPath: /app/config.yaml
|
||||
subPath: config.yaml
|
||||
- name: config-volume
|
||||
mountPath: /app/prompt.yaml
|
||||
subPath: prompt.yaml
|
||||
|
||||
# Azure File Share Mount
|
||||
- name: data-volume
|
||||
mountPath: /app/run_tmp # Directory for program read/write
|
||||
@@ -0,0 +1,10 @@
|
||||
# login AKS
|
||||
# az cloud set -n AzureChinaCloud
|
||||
# az login
|
||||
# az account set -s 36646bff-fbd2-4767-b27b-2fe786b5b15c
|
||||
# az aks get-credentials -g rg-sales2c-ai-service -n aks-sales2c-ai-prd --overwrite-existing --file ~/.kube/config
|
||||
kubectl config use-context aks-sales2c-ai-prd
|
||||
kubectl config current-context
|
||||
|
||||
# kubectl create namespace knowledge-agent
|
||||
kubectl apply -f embedding-api-proxy_k8s.yml -n knowledge-agent
|
||||
@@ -0,0 +1,39 @@
|
||||
# Service 资源:将外部域名映射为集群内 Service
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: itpai-backend
|
||||
spec:
|
||||
type: ExternalName
|
||||
externalName: itpai.infer.api.vgcserv.com.cn
|
||||
ports:
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
targetPort: 443
|
||||
---
|
||||
# Ingress 资源:把 /v1-openai 路径代理到上述 Service
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: itpai-proxy
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
|
||||
nginx.ingress.kubernetes.io/proxy-ssl-server-name: "on"
|
||||
nginx.ingress.kubernetes.io/proxy-ssl-verify: "off"
|
||||
nginx.ingress.kubernetes.io/upstream-vhost: "itpai.infer.api.vgcserv.com.cn"
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "30"
|
||||
nginx.ingress.kubernetes.io/proxy-connect-timeout: "5"
|
||||
spec:
|
||||
rules:
|
||||
- host: sales2c-ai.chinanorth3.cloudapp.chinacloudapi.cn
|
||||
http:
|
||||
paths:
|
||||
- path: /v1-openai
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: itpai-backend
|
||||
port:
|
||||
number: 443
|
||||
42
vw-document-ai-indexer/deploy/prd-usermanual/env.yaml
Normal file
42
vw-document-ai-indexer/deploy/prd-usermanual/env.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
config: config.yaml
|
||||
njobs: 12
|
||||
|
||||
search_service_name: https://search-sales2c-ai-prd.search.azure.cn
|
||||
search_admin_key: ev6B0OtF66WkDmQKJBa4n1Haa8e8p8N3zdaEBnbWtoAzSeAMWSid
|
||||
|
||||
|
||||
embedding_model_endpoint: http://sales2c-ai.chinanorth3.cloudapp.chinacloudapi.cn/v1-openai/embeddings
|
||||
embedding_model_key: gpustack_0e3d5b35adaf239b_99adacd6f540c7d81006365c8030b16c
|
||||
VECTOR_DIMENSION: 4096
|
||||
FLAG_AOAI: "V3"
|
||||
FLAG_EMBEDDING_MODEL: qwen3-embedding-8b
|
||||
|
||||
|
||||
extract_method: di+vision-llm
|
||||
form_rec_resource: https://di-sales2c-ai-prd.cognitiveservices.azure.cn/
|
||||
form_rec_key: G0vhH3twd5K3YYCgfnttf5V6XTMMU4PMdVvRHsgaTb8kZDoU8ZHjJQQJ99BDAEHpCsCfT1gyAAALACOGmOcn
|
||||
di-Formulas: false
|
||||
di-hiRes: true
|
||||
di_allow_features_ext: pdf;jpep;jpg;png;bmp;tiff;heif
|
||||
|
||||
|
||||
FIGURE_BLOB_ACCOUNT_URL: https://sasales2caiprd.blob.core.chinacloudapi.cn/extracted-image-cat-prd?sp=racwdl&st=2025-08-04T06:34:42Z&se=2035-08-04T14:49:42Z&spr=https&sv=2024-11-04&sr=c&sig=t0DTjfht%2FNaPlXUtxhKr40NzZY5kWovgNxJUeAepvgA%3D
|
||||
|
||||
|
||||
DI_BLOB_ACCOUNT_URL: https://sasales2caiprd.blob.core.chinacloudapi.cn/di-result-cat-prd?sp=racwdl&st=2025-08-04T06:34:11Z&se=2035-08-04T14:49:11Z&spr=https&sv=2024-11-04&sr=c&sig=26wxy5M9lcIO2o9zzr6jOtdw2gQTZnGmampHx5EyXbo%3D
|
||||
|
||||
|
||||
DB_URI: postgresql://pgadmin:vwb54pSQDp8vYkusKms@pg-sales2c-ai-prd.postgres.database.chinacloudapi.cn/document-ai-indexer
|
||||
|
||||
# Image understanding
|
||||
figure_caption:
|
||||
include_di_content: false # Figure content that quotes the result of di
|
||||
description_gen_max_images: 0 # The maximum number of images to be described. 0 means no description
|
||||
model_endpoint: null
|
||||
model_key: null
|
||||
model: null # azure openai set null
|
||||
azure_deployment: gpt-4o # azure openai deployment name,Other platforms are set to empty
|
||||
api_version: 2024-08-01-preview # azure openai deployment name,Other platforms are set to empty
|
||||
|
||||
|
||||
header_fix: true
|
||||
Reference in New Issue
Block a user