Files
siemens_ragas/rag_eval/settings.py

74 lines
3.0 KiB
Python
Raw Normal View History

2026-06-12 14:02:15 +08:00
"""Runtime settings loaded from environment variables for evaluation runs."""
from __future__ import annotations
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
REPO_ROOT = Path(__file__).resolve().parents[1]
class EvaluationSettings(BaseSettings):
"""Application settings shared by the CLI, adapters, and metric pipeline."""
model_config = SettingsConfigDict(
env_file=REPO_ROOT / ".env",
env_file_encoding="utf-8",
extra="ignore",
)
openai_api_key: str | None = Field(default=None, alias="OPENAI_API_KEY")
openai_base_url: str = Field(default="http://6.86.80.4:30080/v1", alias="OPENAI_BASE_URL")
ragas_judge_model: str = Field(default="gpt-5", alias="RAGAS_JUDGE_MODEL")
2026-06-12 14:02:15 +08:00
ragas_embedding_model: str = Field(
default="text-embedding-3-small",
2026-06-12 14:02:15 +08:00
alias="RAGAS_EMBEDDING_MODEL",
)
openai_timeout_seconds: float = Field(default=30.0, alias="OPENAI_TIMEOUT_SECONDS")
ragas_metric_timeout_seconds: float = Field(default=45.0, alias="RAGAS_METRIC_TIMEOUT_SECONDS")
batch_size: int = Field(default=8, alias="BATCH_SIZE")
alibaba_access_key_id: str | None = Field(default=None, alias="ALIBABA_ACCESS_KEY_ID")
alibaba_access_key_secret: str | None = Field(default=None, alias="ALIBABA_ACCESS_KEY_SECRET")
alibaba_endpoint: str | None = Field(default=None, alias="ALIBABA_ENDPOINT")
aliyun_parse_poll_interval_seconds: int = Field(
default=5,
alias="ALIYUN_PARSE_POLL_INTERVAL_SECONDS",
)
aliyun_parse_timeout_seconds: int = Field(
default=600,
alias="ALIYUN_PARSE_TIMEOUT_SECONDS",
)
aliyun_parse_layout_step_size: int = Field(
default=50,
alias="ALIYUN_PARSE_LAYOUT_STEP_SIZE",
)
aliyun_llm_enhancement: bool = Field(default=False, alias="ALIYUN_LLM_ENHANCEMENT")
aliyun_enhancement_mode: str = Field(default="balanced", alias="ALIYUN_ENHANCEMENT_MODE")
document_parse_artifact_prefix: str = Field(
default="outputs/dataset-builds",
alias="DOCUMENT_PARSE_ARTIFACT_PREFIX",
)
parser_failure_mode: str = Field(default="fail", alias="PARSER_FAILURE_MODE")
dataset_generator_model: str | None = Field(default=None, alias="DATASET_GENERATOR_MODEL")
score_api_token: str | None = Field(
default=None,
alias="SCORE_API_TOKEN",
description="Bearer token for /api/score endpoint. Empty = no auth.",
)
2026-06-12 14:02:15 +08:00
@property
def openai_client_kwargs(self) -> dict[str, str | float]:
"""Return keyword arguments for the OpenAI client when credentials are available."""
if not self.openai_api_key:
return {}
client_kwargs: dict[str, str | float] = {
"api_key": self.openai_api_key,
"timeout": max(1.0, float(self.openai_timeout_seconds)),
}
if self.openai_base_url.strip():
client_kwargs["base_url"] = self.openai_base_url.strip()
return client_kwargs