This commit is contained in:
2025-09-26 17:15:54 +08:00
commit db0e5965ec
211 changed files with 40437 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
- data_path: "https://sasales2caiprd.blob.core.chinacloudapi.cn/doc-landing-cat-usermanual-prd?sp=racwdl&st=2025-08-27T06:26:11Z&se=2035-08-27T14:41:11Z&spr=https&sv=2024-11-04&sr=c&sig=7GVqfbWPM5VDRW8crTeR06KsSPX%2BuuDLjN7ceqBuLCE%3D"
datasource_name: "cat-usermanual-prd"
data_dir: ""
base_path: "/app/run_tmp"
process_file_num: 0
process_file_last_modify: "2025-06-24 00:00:00"
chunk_size: 2048
token_overlap: 128
index_schemas:
- index_name: "index-cat-usermanual-chunk-prd"
data_type: ["chunk"]
field_type: "append"
upload_batch_size: 50
fields: ["filepath", "title"]
full_metadata_vector_fields: ["full_headers", "doc_metadata"]
semantic_config_name: "default"
vector_config_name: "vectorSearchProfile"
update_by_field: "filepath"
vector_fields:
- field: "contentVector"
append_fields: ["content"]
- field: "full_metadata_vector"
append_fields: ["full_headers", "doc_metadata"]
merge_fields:
- key: "doc_metadata"
fields: ["title"]
full_metadata_vector_fields: ["full_headers", "doc_metadata"]

View File

@@ -0,0 +1,50 @@
# docker build
docker login acrsales2caiprd.azurecr.cn -u username -p password
docker build . -t document-ai-indexer:2.0.4
docker tag document-ai-indexer:2.0.4 acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.4
docker push acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.4
# login AKS
# az cloud set --name AzureCloud # Switch CLI to Azure cloud
# az login # Log in to Azure China account (browser or device code flow)
# az account set -s 079d8bd8-b4cc-4892-9307-aa6dedf890e9 #! set subs
# az aks get-credentials -g rg-aiflow-lab -n aks-sales2c-ai-prd --overwrite-existing --file ~/.kube/config
kubectl config use-context aks-sales2c-ai-prd
kubectl config current-context
# Create Azure Files Volume
# kubectl create secret generic azure-files-cred \
# --from-literal=azurestorageaccountname=saaisearchlab \
# --from-literal=azurestorageaccountkey=xxxxxxxxxxxxxxxxxxxx \
# -n knowledge-agent
# kubectl delete configmap document-ai-indexer-usermanual-config -n knowledge-agent
# Deploy ConfigMap
kubectl delete configmap document-ai-indexer-usermanual-config -n knowledge-agent
kubectl create configmap document-ai-indexer-usermanual-config -n knowledge-agent --from-file=.\deploy\prd-usermanual\env.yaml --from-file=.\deploy\prd-usermanual\config.yaml --from-file=prompt.yaml
# Deploy Pod
# kubectl create namespace knowledge-agent
# kubectl delete pod document-ai-indexer-usermanual -n knowledge-agent
kubectl apply -f .\deploy\prd-usermanual\document-ai-indexer-usermanual.yml -n knowledge-agent
# Monitor Pod
kubectl logs -f document-ai-indexer-usermanual -n knowledge-agent
# Deploy CronJob
kubectl apply -f deploy/prd-usermanual/document-ai-indexer-cronjob.yml --namespace knowledge-agent
# Check CronJob Status
kubectl get cronjobs -n knowledge-agent --namespace knowledge-agent
# Check Job Execution History
kubectl get jobs -n knowledge-agent --namespace knowledge-agent
###########
# Manually trigger a job (for testing)
kubectl delete job manual-test -n knowledge-agent
kubectl create job --from=cronjob/document-ai-indexer-cronjob manual-test -n knowledge-agent
# Check Job Logs
kubectl logs -f job/manual-test -n knowledge-agent

View File

@@ -0,0 +1,64 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: document-ai-indexer-cronjob
spec:
# Scheduling configuration - execute every 10 minutes
schedule: "*/10 * * * *"
# Concurrency policy: Disable concurrent execution. If the previous job is still running, new execution will be skipped.
concurrencyPolicy: Forbid
# Successful jobs history limit: Keep the last 3 successful job records.
successfulJobsHistoryLimit: 10
# Failed jobs history limit: Keep the last failed job record.
failedJobsHistoryLimit: 10
# Job template
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
labels:
app: document-ai-indexer
job-type: cronjob
spec:
restartPolicy: Never
volumes:
# 1. ConfigMap volume
- name: config-volume
configMap:
name: document-ai-indexer-config
items:
- key: env.yaml
path: env.yaml
- key: config.yaml
path: config.yaml
# 2. Azure File Share volume
- name: data-volume
azureFile:
secretName: azure-files-cred # Quoting what you created Secret
shareName: fs-document-ai-indexer # Your file share name
readOnly: false # Write permission
containers:
- name: document-ai-indexer
image: acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.1
imagePullPolicy: Always
# Mount the volume into the container
volumeMounts:
# ConfigMap Mount
- name: config-volume
mountPath: /app/env.yaml
subPath: env.yaml
- name: config-volume
mountPath: /app/config.yaml
subPath: config.yaml
# Azure File Shared mount
- name: data-volume
mountPath: /app/run_tmp # Program write/read directory

View File

@@ -0,0 +1,47 @@
apiVersion: v1
kind: Pod
metadata:
name: document-ai-indexer-usermanual
spec:
restartPolicy: Never
volumes:
# 1. ConfigMap volume
- name: config-volume
configMap:
name: document-ai-indexer-usermanual-config
items:
- key: env.yaml
path: env.yaml
- key: config.yaml
path: config.yaml
- key: prompt.yaml
path: prompt.yaml
# 2. Azure File Share volume
- name: data-volume
azureFile:
secretName: azure-files-cred # Quoting what you created Secret
shareName: fs-document-ai-indexer # Your file share name
readOnly: false
containers:
- name: document-ai-indexer-usermanual
image: acrsales2caiprd.azurecr.cn/document-ai-indexer:2.0.4
imagePullPolicy: Always
# Mount the volume into the container
volumeMounts:
# ConfigMap Mount
- name: config-volume
mountPath: /app/env.yaml
subPath: env.yaml
- name: config-volume
mountPath: /app/config.yaml
subPath: config.yaml
- name: config-volume
mountPath: /app/prompt.yaml
subPath: prompt.yaml
# Azure File Share Mount
- name: data-volume
mountPath: /app/run_tmp # Directory for program read/write

View File

@@ -0,0 +1,10 @@
# login AKS
# az cloud set -n AzureChinaCloud
# az login
# az account set -s 36646bff-fbd2-4767-b27b-2fe786b5b15c
# az aks get-credentials -g rg-sales2c-ai-service -n aks-sales2c-ai-prd --overwrite-existing --file ~/.kube/config
kubectl config use-context aks-sales2c-ai-prd
kubectl config current-context
# kubectl create namespace knowledge-agent
kubectl apply -f embedding-api-proxy_k8s.yml -n knowledge-agent

View File

@@ -0,0 +1,39 @@
# Service 资源:将外部域名映射为集群内 Service
apiVersion: v1
kind: Service
metadata:
name: itpai-backend
spec:
type: ExternalName
externalName: itpai.infer.api.vgcserv.com.cn
ports:
- port: 443
protocol: TCP
targetPort: 443
---
# Ingress 资源:把 /v1-openai 路径代理到上述 Service
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: itpai-proxy
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
nginx.ingress.kubernetes.io/proxy-ssl-server-name: "on"
nginx.ingress.kubernetes.io/proxy-ssl-verify: "off"
nginx.ingress.kubernetes.io/upstream-vhost: "itpai.infer.api.vgcserv.com.cn"
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
nginx.ingress.kubernetes.io/proxy-send-timeout: "30"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "5"
spec:
rules:
- host: sales2c-ai.chinanorth3.cloudapp.chinacloudapi.cn
http:
paths:
- path: /v1-openai
pathType: Prefix
backend:
service:
name: itpai-backend
port:
number: 443

View File

@@ -0,0 +1,42 @@
config: config.yaml
njobs: 12
search_service_name: https://search-sales2c-ai-prd.search.azure.cn
search_admin_key: ev6B0OtF66WkDmQKJBa4n1Haa8e8p8N3zdaEBnbWtoAzSeAMWSid
embedding_model_endpoint: http://sales2c-ai.chinanorth3.cloudapp.chinacloudapi.cn/v1-openai/embeddings
embedding_model_key: gpustack_0e3d5b35adaf239b_99adacd6f540c7d81006365c8030b16c
VECTOR_DIMENSION: 4096
FLAG_AOAI: "V3"
FLAG_EMBEDDING_MODEL: qwen3-embedding-8b
extract_method: di+vision-llm
form_rec_resource: https://di-sales2c-ai-prd.cognitiveservices.azure.cn/
form_rec_key: G0vhH3twd5K3YYCgfnttf5V6XTMMU4PMdVvRHsgaTb8kZDoU8ZHjJQQJ99BDAEHpCsCfT1gyAAALACOGmOcn
di-Formulas: false
di-hiRes: true
di_allow_features_ext: pdf;jpep;jpg;png;bmp;tiff;heif
FIGURE_BLOB_ACCOUNT_URL: https://sasales2caiprd.blob.core.chinacloudapi.cn/extracted-image-cat-prd?sp=racwdl&st=2025-08-04T06:34:42Z&se=2035-08-04T14:49:42Z&spr=https&sv=2024-11-04&sr=c&sig=t0DTjfht%2FNaPlXUtxhKr40NzZY5kWovgNxJUeAepvgA%3D
DI_BLOB_ACCOUNT_URL: https://sasales2caiprd.blob.core.chinacloudapi.cn/di-result-cat-prd?sp=racwdl&st=2025-08-04T06:34:11Z&se=2035-08-04T14:49:11Z&spr=https&sv=2024-11-04&sr=c&sig=26wxy5M9lcIO2o9zzr6jOtdw2gQTZnGmampHx5EyXbo%3D
DB_URI: postgresql://pgadmin:vwb54pSQDp8vYkusKms@pg-sales2c-ai-prd.postgres.database.chinacloudapi.cn/document-ai-indexer
# Image understanding
figure_caption:
include_di_content: false # Figure content that quotes the result of di
description_gen_max_images: 0 # The maximum number of images to be described. 0 means no description
model_endpoint: null
model_key: null
model: null # azure openai set null
azure_deployment: gpt-4o # azure openai deployment name,Other platforms are set to empty
api_version: 2024-08-01-preview # azure openai deployment name,Other platforms are set to empty
header_fix: true