104 lines
9.7 KiB
YAML
104 lines
9.7 KiB
YAML
|
|
- data_path: "https://sasales2caiprd.blob.core.chinacloudapi.cn/doc-landing-cat-prd?sp=rl&st=2025-08-02T08:25:56Z&se=2125-08-02T16:40:56Z&spr=https&sv=2024-11-04&sr=c&sig=lJui2%2BOs8V%2BdzCkjchQCR7ITWT28tJ0HAq8bIhkkM%2Bk%3D"
|
||
|
|
datasource_name: "cat-standard-regulation-prd"
|
||
|
|
data_dir: ""
|
||
|
|
base_path: "/app/run_tmp"
|
||
|
|
process_file_num: 0
|
||
|
|
process_file_last_modify: "2025-06-24 00:00:00"
|
||
|
|
chunk_size: 2048
|
||
|
|
token_overlap: 256
|
||
|
|
index_schemas:
|
||
|
|
- index_name: "index-catonline-chunk-v2-prd"
|
||
|
|
data_type: ["metadata", "document", "chunk"]
|
||
|
|
# field_type: "append"
|
||
|
|
upload_batch_size: 50
|
||
|
|
fields: ["doc_metadata", "full_metadata_vector", "filepath", "timestamp", "title", "publisher", "publish_date", "document_category", "document_code", "language_code", "x_Standard_Regulation_Id", "x_Attachment_Type", "x_Standard_Title_CN", "x_Standard_Title_EN", "x_Standard_Published_State", "x_Standard_Drafting_Status", "x_Standard_Range", "x_Standard_Kind", "x_Standard_No", "x_Standard_Code", "x_Standard_Technical_Committee", "x_Standard_Vehicle_Type", "x_Standard_Power_Type", "x_Standard_CCS", "x_Standard_ICS", "x_Standard_Published_Date", "x_Standard_Effective_Date", "x_Regulation_Status", "x_Regulation_Title_CN", "x_Regulation_Title_EN", "x_Regulation_Document_No", "x_Regulation_Issued_Date", "x_Classification", "x_Work_Group", "x_Reference_Standard", "x_Replaced_by", "x_Refer_To", "func_uuid", "update_time", "status", "x_Standard_Published_State_EN", "x_Standard_Drafting_Status_EN", "x_Regulation_Status_EN","x_Replaced_Standard"]
|
||
|
|
full_metadata_vector_fields: ["full_headers", "doc_metadata"] #todo check
|
||
|
|
semantic_config_name: "default"
|
||
|
|
vector_config_name: "vectorSearchProfile"
|
||
|
|
update_by_field: "filepath"
|
||
|
|
vector_fields:
|
||
|
|
- field: "contentVector"
|
||
|
|
append_fields: ["content"]
|
||
|
|
- field: "full_metadata_vector"
|
||
|
|
append_fields: ["full_headers", "doc_metadata"]
|
||
|
|
- index_name: "index-catonline-document-v2-prd"
|
||
|
|
data_type: ["document", "metadata"]
|
||
|
|
# field_type: "full"
|
||
|
|
key_fields: ["filepath"]
|
||
|
|
upload_batch_size: 1
|
||
|
|
fields: ["doc_metadata", "full_metadata_vector", "url", "metadata", "image_mapping", "document_schema", "main_title", "filepath", "timestamp", "title", "publisher", "publish_date", "document_category", "document_code", "language_code", "x_Standard_Regulation_Id", "x_Attachment_Type", "x_Standard_Title_CN", "x_Standard_Title_EN", "x_Standard_Published_State", "x_Standard_Drafting_Status", "x_Standard_Range", "x_Standard_Kind", "x_Standard_No", "x_Standard_Code", "x_Standard_Technical_Committee", "x_Standard_Vehicle_Type", "x_Standard_Power_Type", "x_Standard_CCS", "x_Standard_ICS", "x_Standard_Published_Date", "x_Standard_Effective_Date", "x_Regulation_Status", "x_Regulation_Title_CN", "x_Regulation_Title_EN", "x_Regulation_Document_No", "x_Regulation_Issued_Date", "x_Classification", "x_Work_Group", "x_Reference_Standard", "x_Replaced_by", "x_Refer_To", "func_uuid", "update_time", "status", "x_Standard_Published_State_EN", "x_Standard_Drafting_Status_EN", "x_Regulation_Status_EN","x_Replaced_Standard"]
|
||
|
|
merge_content_fields: ["content"]
|
||
|
|
full_metadata_vector_fields: ["doc_metadata"]
|
||
|
|
semantic_config_name: "default"
|
||
|
|
vector_config_name: "vectorSearchProfile"
|
||
|
|
update_by_field: "filepath"
|
||
|
|
vector_fields:
|
||
|
|
- field: "full_metadata_vector"
|
||
|
|
append_fields: ["doc_metadata"]
|
||
|
|
- index_name: "index-catonline-standard-regulation-v2-prd"
|
||
|
|
data_type: ["metadata"]
|
||
|
|
# field_type: "full"
|
||
|
|
key_fields: ["x_Standard_Regulation_Id"]
|
||
|
|
upload_batch_size: 1
|
||
|
|
fields: ["doc_metadata", "full_metadata_vector", "filepath", "timestamp", "title", "publisher", "publish_date", "document_category", "document_code", "language_code", "x_Standard_Regulation_Id", "x_Attachment_Type", "x_Standard_Title_CN", "x_Standard_Title_EN", "x_Standard_Published_State", "x_Standard_Drafting_Status", "x_Standard_Range", "x_Standard_Kind", "x_Standard_No", "x_Standard_Code", "x_Standard_Technical_Committee", "x_Standard_Vehicle_Type", "x_Standard_Power_Type", "x_Standard_CCS", "x_Standard_ICS", "x_Standard_Published_Date", "x_Standard_Effective_Date", "x_Regulation_Status", "x_Regulation_Title_CN", "x_Regulation_Title_EN", "x_Regulation_Document_No", "x_Regulation_Issued_Date", "x_Classification", "x_Work_Group", "x_Reference_Standard", "x_Replaced_by", "x_Refer_To", "func_uuid", "update_time", "status", "x_Standard_Published_State_EN", "x_Standard_Drafting_Status_EN", "x_Regulation_Status_EN","x_Replaced_Standard"]
|
||
|
|
vector_config_name: "vectorSearchProfile"
|
||
|
|
full_metadata_vector_fields: ["doc_metadata"]
|
||
|
|
semantic_config_name: "default"
|
||
|
|
update_by_field: "x_Standard_Regulation_Id"
|
||
|
|
vector_fields:
|
||
|
|
- field: "full_metadata_vector"
|
||
|
|
append_fields: ["doc_metadata"]
|
||
|
|
merge_fields:
|
||
|
|
- key: "doc_metadata"
|
||
|
|
fields: ["title", "publisher", "document_category", "document_code", "x_Attachment_Type", "x_Standard_Title_CN", "x_Standard_Title_EN", "x_Standard_Kind", "x_Standard_Technical_Committee", "x_Standard_Vehicle_Type", "x_Standard_Power_Type", "x_Standard_CCS", "x_Standard_ICS", "x_Regulation_Status", "x_Regulation_Title_CN", "x_Regulation_Title_EN", "x_Classification", "x_Work_Group", "status", "x_Standard_Published_State_EN", "x_Standard_Drafting_Status_EN", "x_Regulation_Status_EN","x_Replaced_Standard"]
|
||
|
|
- data_path: "https://sasales2caiprd.blob.core.chinacloudapi.cn/doc-landing-cat-abroad-prd?sp=rl&st=2025-09-08T05:32:13Z&se=2099-09-08T13:47:13Z&sv=2024-11-04&sr=c&sig=ebYoiKrSwCk12cRnQqov197LvuBv7m%2FxNoQv4VDMY5o%3D"
|
||
|
|
datasource_name: "cat-standard-regulation-oversea"
|
||
|
|
data_dir: ""
|
||
|
|
base_path: "/app/run_tmp"
|
||
|
|
process_file_num: 0
|
||
|
|
process_file_last_modify: "2025-06-24 00:00:00"
|
||
|
|
chunk_size: 2048
|
||
|
|
token_overlap: 256
|
||
|
|
index_schemas:
|
||
|
|
- index_name: "index-catonline-chunk-oversea"
|
||
|
|
data_type: ["metadata", "document", "chunk"]
|
||
|
|
upload_batch_size: 50
|
||
|
|
fields: ["filepath","timestamp","file_Name","file_Url","file_Type","entity_Attribute","standard_Id","standard_Code","standard_Title_Cn","standard_Title_En","domain_Name","standard_State_Name","type_Name","publish_Date","draft_type_name","qc_Abroad_Professional_Fields","applicable_Models","standard_Type_Name","technical_Field_Name","create_Time","update_Time","version_Id","version_Name","version_Parent_id","version_Parent_Name","technical_Series_No","implementation_Date","version_Publish_Date","newFlag_State","publish_Status_Text","implementation_Status_Text","new_Car_Implementation_Status_Text","production_Car_Implementation_Status_Text","production_Car_Implementation_Str"]
|
||
|
|
full_metadata_vector_fields: ["full_headers", "doc_metadata"]
|
||
|
|
semantic_config_name: "default"
|
||
|
|
vector_config_name: "vectorSearchProfile"
|
||
|
|
update_by_field: "filepath"
|
||
|
|
vector_fields:
|
||
|
|
- field: "contentVector"
|
||
|
|
append_fields: ["content"]
|
||
|
|
- field: "full_metadata_vector"
|
||
|
|
append_fields: ["full_headers", "doc_metadata"]
|
||
|
|
- index_name: "index-catonline-document-oversea"
|
||
|
|
data_type: ["document", "metadata"]
|
||
|
|
key_fields: ["filepath"]
|
||
|
|
upload_batch_size: 1
|
||
|
|
fields: ["filepath","timestamp","file_Name","file_Url","file_Type","entity_Attribute","standard_Id","standard_Code","standard_Title_Cn","standard_Title_En","domain_Name","standard_State_Name","type_Name","publish_Date","draft_type_name","qc_Abroad_Professional_Fields","applicable_Models","standard_Type_Name","technical_Field_Name","create_Time","update_Time","version_Id","version_Name","version_Parent_id","version_Parent_Name","technical_Series_No","implementation_Date","version_Publish_Date","newFlag_State","publish_Status_Text","implementation_Status_Text","new_Car_Implementation_Status_Text","production_Car_Implementation_Status_Text","production_Car_Implementation_Str"]
|
||
|
|
merge_content_fields: ["content"]
|
||
|
|
full_metadata_vector_fields: ["doc_metadata"]
|
||
|
|
semantic_config_name: "default"
|
||
|
|
vector_config_name: "vectorSearchProfile"
|
||
|
|
update_by_field: "filepath"
|
||
|
|
vector_fields:
|
||
|
|
- field: "full_metadata_vector"
|
||
|
|
append_fields: ["doc_metadata"]
|
||
|
|
- index_name: "index-catonline-standard-regulation-oversea"
|
||
|
|
data_type: ["metadata"]
|
||
|
|
key_fields: ["standard_Id"]
|
||
|
|
upload_batch_size: 1
|
||
|
|
fields: ["filepath","timestamp","file_Name","file_Url","file_Type","entity_Attribute","standard_Id","standard_Code","standard_Title_Cn","standard_Title_En","domain_Name","standard_State_Name","type_Name","publish_Date","draft_type_name","qc_Abroad_Professional_Fields","applicable_Models","standard_Type_Name","technical_Field_Name","create_Time","update_Time","version_Id","version_Name","version_Parent_id","version_Parent_Name","technical_Series_No","implementation_Date","version_Publish_Date","newFlag_State","publish_Status_Text","implementation_Status_Text","new_Car_Implementation_Status_Text","production_Car_Implementation_Status_Text","production_Car_Implementation_Str"]
|
||
|
|
vector_config_name: "vectorSearchProfile"
|
||
|
|
full_metadata_vector_fields: ["doc_metadata"]
|
||
|
|
semantic_config_name: "default"
|
||
|
|
update_by_field: "standard_Id"
|
||
|
|
vector_fields:
|
||
|
|
- field: "full_metadata_vector"
|
||
|
|
append_fields: ["doc_metadata"]
|
||
|
|
merge_fields:
|
||
|
|
- key: "doc_metadata"
|
||
|
|
fields: ["file_Name","entity_Attribute","standard_Code","standard_Title_Cn","standard_Title_En","domain_Name","standard_State_Name","type_Name","draft_type_name","qc_Abroad_Professional_Fields","applicable_Models","standard_Type_Name","technical_Field_Name","version_Name","version_Parent_Name","technical_Series_No","newFlag_State","publish_Status_Text","implementation_Status_Text","new_Car_Implementation_Status_Text","production_Car_Implementation_Status_Text","production_Car_Implementation_Str " ]
|