Compare commits
14 Commits
1bcb208f92
...
a781ba1e4a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a781ba1e4a | ||
|
|
2ad2c1ea9d | ||
|
|
f8e308b7dc | ||
|
|
fb420656ec | ||
|
|
05419db1f9 | ||
|
|
1dc7ab9727 | ||
|
|
7cc3aff95a | ||
|
|
ad2651ce27 | ||
|
|
fb42116616 | ||
|
|
a629bd516c | ||
|
|
ac410e7a5d | ||
|
|
1304fec1c4 | ||
|
|
5ced129ff7 | ||
|
|
ebf1fc7be8 |
@@ -8,8 +8,10 @@ OPENAI_BASE_URL=http://6.86.80.4:30080/v1
|
|||||||
OPENAI_TIMEOUT_SECONDS=180
|
OPENAI_TIMEOUT_SECONDS=180
|
||||||
|
|
||||||
# 默认评测模型(可在场景 YAML 或 Web 控制台 LLM 配置中覆盖)
|
# 默认评测模型(可在场景 YAML 或 Web 控制台 LLM 配置中覆盖)
|
||||||
RAGAS_JUDGE_MODEL=deepseek-v4-flash
|
# RAGAS_JUDGE_MODEL 需支持 max_tokens + json_object(gpt-5、gpt-4.1、gpt-4o 等)
|
||||||
RAGAS_EMBEDDING_MODEL=text-embedding-v3
|
# 注意:gpt-5.4/5.5/5.2 系列不支持 max_tokens,与 RAGAS 0.4.3 不兼容
|
||||||
|
RAGAS_JUDGE_MODEL=gpt-5
|
||||||
|
RAGAS_EMBEDDING_MODEL=text-embedding-3-small
|
||||||
|
|
||||||
# 评估并发控制(启用 7 个指标时建议 RAGAS_METRIC_TIMEOUT_SECONDS=300)
|
# 评估并发控制(启用 7 个指标时建议 RAGAS_METRIC_TIMEOUT_SECONDS=300)
|
||||||
BATCH_SIZE=8
|
BATCH_SIZE=8
|
||||||
|
|||||||
26
.gitattributes
vendored
Normal file
26
.gitattributes
vendored
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# 默认:文本文件使用 LF(Linux/macOS 风格)
|
||||||
|
* text=auto eol=lf
|
||||||
|
|
||||||
|
# Shell 脚本强制 LF,无论在哪个平台 checkout
|
||||||
|
*.sh text eol=lf
|
||||||
|
|
||||||
|
# Python 和 YAML 也用 LF
|
||||||
|
*.py text eol=lf
|
||||||
|
*.yaml text eol=lf
|
||||||
|
*.yml text eol=lf
|
||||||
|
*.md text eol=lf
|
||||||
|
*.json text eol=lf
|
||||||
|
*.toml text eol=lf
|
||||||
|
*.txt text eol=lf
|
||||||
|
*.env text eol=lf
|
||||||
|
*.env.example text eol=lf
|
||||||
|
|
||||||
|
# Windows 脚本保留 CRLF
|
||||||
|
*.ps1 text eol=crlf
|
||||||
|
*.bat text eol=crlf
|
||||||
|
|
||||||
|
# 二进制文件不转换
|
||||||
|
*.pdf binary
|
||||||
|
*.png binary
|
||||||
|
*.jpg binary
|
||||||
|
*.csv binary
|
||||||
@@ -17,3 +17,8 @@ dependencies = [
|
|||||||
"pydantic-settings>=2.14.1",
|
"pydantic-settings>=2.14.1",
|
||||||
"ragas==0.4.3",
|
"ragas==0.4.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
# 只打包源码目录,排除运行时产生的数据目录
|
||||||
|
include = ["rag_eval*", "apps*", "webapp*"]
|
||||||
|
exclude = ["logs*", "outputs*", "datasets*", "configs*", "scenarios*", "scripts*", "tests*"]
|
||||||
|
|||||||
@@ -21,9 +21,9 @@ class EvaluationSettings(BaseSettings):
|
|||||||
|
|
||||||
openai_api_key: str | None = Field(default=None, alias="OPENAI_API_KEY")
|
openai_api_key: str | None = Field(default=None, alias="OPENAI_API_KEY")
|
||||||
openai_base_url: str = Field(default="http://6.86.80.4:30080/v1", alias="OPENAI_BASE_URL")
|
openai_base_url: str = Field(default="http://6.86.80.4:30080/v1", alias="OPENAI_BASE_URL")
|
||||||
ragas_judge_model: str = Field(default="deepseek-v4-flash", alias="RAGAS_JUDGE_MODEL")
|
ragas_judge_model: str = Field(default="gpt-5", alias="RAGAS_JUDGE_MODEL")
|
||||||
ragas_embedding_model: str = Field(
|
ragas_embedding_model: str = Field(
|
||||||
default="text-embedding-v3",
|
default="text-embedding-3-small",
|
||||||
alias="RAGAS_EMBEDDING_MODEL",
|
alias="RAGAS_EMBEDDING_MODEL",
|
||||||
)
|
)
|
||||||
openai_timeout_seconds: float = Field(default=30.0, alias="OPENAI_TIMEOUT_SECONDS")
|
openai_timeout_seconds: float = Field(default=30.0, alias="OPENAI_TIMEOUT_SECONDS")
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
|
|
||||||
from webapp.models import (
|
from webapp.models import (
|
||||||
@@ -13,19 +15,23 @@ from webapp.services import scenario_scanner
|
|||||||
from webapp.services.task_manager import task_manager
|
from webapp.services.task_manager import task_manager
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/evaluations", tags=["evaluations"])
|
router = APIRouter(prefix="/api/evaluations", tags=["evaluations"])
|
||||||
|
logger = logging.getLogger("webapp.api.evaluations")
|
||||||
|
|
||||||
|
|
||||||
@router.post("", response_model=TriggerEvaluationResponse)
|
@router.post("", response_model=TriggerEvaluationResponse)
|
||||||
def trigger_evaluation(request: TriggerEvaluationRequest) -> TriggerEvaluationResponse:
|
def trigger_evaluation(request: TriggerEvaluationRequest) -> TriggerEvaluationResponse:
|
||||||
"""Validate the scenario path and queue a background evaluation task."""
|
"""Validate the scenario path and queue a background evaluation task."""
|
||||||
|
logger.info("[trigger] scenario=%s", request.scenario_path)
|
||||||
resolved = scenario_scanner.resolve_scenario_path(request.scenario_path)
|
resolved = scenario_scanner.resolve_scenario_path(request.scenario_path)
|
||||||
if resolved is None:
|
if resolved is None:
|
||||||
|
logger.warning("[trigger] invalid scenario path: %s", request.scenario_path)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
detail=f"无效或不允许的场景路径: {request.scenario_path}",
|
detail=f"无效或不允许的场景路径: {request.scenario_path}",
|
||||||
)
|
)
|
||||||
|
|
||||||
task_id = task_manager.submit(request.scenario_path)
|
task_id = task_manager.submit(request.scenario_path)
|
||||||
|
logger.info("[trigger] queued task_id=%s scenario=%s", task_id, request.scenario_path)
|
||||||
return TriggerEvaluationResponse(task_id=task_id)
|
return TriggerEvaluationResponse(task_id=task_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -34,11 +40,15 @@ def get_task_status(task_id: str) -> TaskStatus:
|
|||||||
"""Return the current status and logs for one evaluation task."""
|
"""Return the current status and logs for one evaluation task."""
|
||||||
status = task_manager.get(task_id)
|
status = task_manager.get(task_id)
|
||||||
if status is None:
|
if status is None:
|
||||||
|
logger.warning("[task_status] not found task_id=%s", task_id)
|
||||||
raise HTTPException(status_code=404, detail=f"未找到任务: {task_id}")
|
raise HTTPException(status_code=404, detail=f"未找到任务: {task_id}")
|
||||||
|
logger.debug("[task_status] task_id=%s status=%s", task_id, status.status)
|
||||||
return status
|
return status
|
||||||
|
|
||||||
|
|
||||||
@router.get("", response_model=dict)
|
@router.get("", response_model=dict)
|
||||||
def list_tasks() -> dict[str, list]:
|
def list_tasks() -> dict[str, list]:
|
||||||
"""Return all known evaluation tasks for this server session."""
|
"""Return all known evaluation tasks for this server session."""
|
||||||
return {"tasks": [task.model_dump() for task in task_manager.list_tasks()]}
|
tasks = task_manager.list_tasks()
|
||||||
|
logger.info("[list_tasks] count=%d", len(tasks))
|
||||||
|
return {"tasks": [task.model_dump() for task in tasks]}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
@@ -19,6 +20,19 @@ from webapp.services.profile_manager import profile_manager
|
|||||||
from webapp.services.yaml_patcher import apply_profiles_to_scenario
|
from webapp.services.yaml_patcher import apply_profiles_to_scenario
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/llm-profiles", tags=["llm-profiles"])
|
router = APIRouter(prefix="/api/llm-profiles", tags=["llm-profiles"])
|
||||||
|
logger = logging.getLogger("webapp.api.llm_profiles")
|
||||||
|
|
||||||
|
|
||||||
|
# 常见 embedding 模型名称关键词,用于自动判断走 /embeddings 端点
|
||||||
|
_EMBEDDING_MODEL_KEYWORDS = (
|
||||||
|
"embedding", "embed", "text-search", "text-similarity",
|
||||||
|
"code-search", "ada-002",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_embedding_model(model: str) -> bool:
|
||||||
|
"""Heuristic: return True if the model name looks like an embedding model."""
|
||||||
|
return any(kw in model.lower() for kw in _EMBEDDING_MODEL_KEYWORDS)
|
||||||
|
|
||||||
|
|
||||||
def _do_connectivity_test(
|
def _do_connectivity_test(
|
||||||
@@ -27,58 +41,102 @@ def _do_connectivity_test(
|
|||||||
api_key: str,
|
api_key: str,
|
||||||
timeout_seconds: int,
|
timeout_seconds: int,
|
||||||
) -> ProfileTestResponse:
|
) -> ProfileTestResponse:
|
||||||
"""Send a minimal chat completion request and return the test result."""
|
"""Send a minimal request and return the connectivity test result.
|
||||||
|
|
||||||
|
- Embedding models → POST /embeddings with a short text
|
||||||
|
- Chat models → POST /chat/completions, tries max_completion_tokens first
|
||||||
|
(required by newer models like gpt-5.x), falls back to max_tokens.
|
||||||
|
"""
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=base_url.rstrip("/"),
|
base_url=base_url.rstrip("/"),
|
||||||
timeout=float(timeout_seconds),
|
timeout=float(timeout_seconds),
|
||||||
)
|
)
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
|
if _is_embedding_model(model):
|
||||||
|
# Embedding 模型走 /embeddings 端点
|
||||||
|
try:
|
||||||
|
client.embeddings.create(model=model, input="test")
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
return ProfileTestResponse(ok=True, message="连接成功(embedding)", latency_ms=latency_ms)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)
|
||||||
|
|
||||||
|
# Chat 模型:先不限制 token(最兼容),超时/鉴权错误直接返回
|
||||||
|
# 避免 max_tokens=1 对部分模型(gpt-5.x)触发 min-output 限制
|
||||||
try:
|
try:
|
||||||
client.chat.completions.create(
|
client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "hi"}],
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
max_tokens=1,
|
max_tokens=8, # 足够小节省费用,同时满足各模型最小输出要求
|
||||||
)
|
)
|
||||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
|
return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
|
err_str = str(exc)
|
||||||
|
# 如果 max_tokens 不被支持,改用 max_completion_tokens 再试一次
|
||||||
|
if "max_tokens" in err_str and "max_completion_tokens" in err_str:
|
||||||
|
try:
|
||||||
|
client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
max_completion_tokens=8,
|
||||||
|
)
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
|
||||||
|
except Exception as exc2: # noqa: BLE001
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
return ProfileTestResponse(ok=False, message=str(exc2), latency_ms=latency_ms)
|
||||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)
|
return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms)
|
||||||
|
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])
|
@router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])
|
||||||
def probe_connectivity(request: ProfileProbeRequest) -> ProfileTestResponse:
|
def probe_connectivity(request: ProfileProbeRequest) -> ProfileTestResponse:
|
||||||
"""Test LLM connectivity with inline credentials (no saved profile required)."""
|
"""Test LLM connectivity with inline credentials (no saved profile required)."""
|
||||||
return _do_connectivity_test(
|
logger.info("[probe] model=%s base_url=%s", request.model, request.base_url)
|
||||||
|
result = _do_connectivity_test(
|
||||||
model=request.model,
|
model=request.model,
|
||||||
base_url=request.base_url,
|
base_url=request.base_url,
|
||||||
api_key=request.api_key,
|
api_key=request.api_key,
|
||||||
timeout_seconds=request.timeout_seconds,
|
timeout_seconds=request.timeout_seconds,
|
||||||
)
|
)
|
||||||
|
logger.info("[probe] ok=%s latency=%sms msg=%s", result.ok, result.latency_ms, result.message)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.get("", response_model=dict)
|
@router.get("", response_model=dict)
|
||||||
def list_profiles() -> dict:
|
def list_profiles() -> dict:
|
||||||
"""Return all saved LLM profiles."""
|
"""Return all saved LLM profiles."""
|
||||||
return {"profiles": [p.model_dump() for p in profile_manager.list_all()]}
|
profiles = profile_manager.list_all()
|
||||||
|
logger.info("[list_profiles] count=%d", len(profiles))
|
||||||
|
return {"profiles": [p.model_dump() for p in profiles]}
|
||||||
|
|
||||||
|
|
||||||
@router.post("", status_code=201, response_model=LLMProfile)
|
@router.post("", status_code=201, response_model=LLMProfile)
|
||||||
def create_profile(request: CreateProfileRequest) -> LLMProfile:
|
def create_profile(request: CreateProfileRequest) -> LLMProfile:
|
||||||
"""Create a new LLM profile."""
|
"""Create a new LLM profile."""
|
||||||
return profile_manager.create(
|
logger.info("[create_profile] name=%r model=%s base_url=%s", request.name, request.model, request.base_url)
|
||||||
|
profile = profile_manager.create(
|
||||||
name=request.name,
|
name=request.name,
|
||||||
model=request.model,
|
model=request.model,
|
||||||
base_url=request.base_url,
|
base_url=request.base_url,
|
||||||
api_key=request.api_key,
|
api_key=request.api_key,
|
||||||
timeout_seconds=request.timeout_seconds,
|
timeout_seconds=request.timeout_seconds,
|
||||||
)
|
)
|
||||||
|
logger.info("[create_profile] created id=%s", profile.profile_id)
|
||||||
|
return profile
|
||||||
|
|
||||||
|
|
||||||
@router.put("/{profile_id}", response_model=LLMProfile)
|
@router.put("/{profile_id}", response_model=LLMProfile)
|
||||||
def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile:
|
def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile:
|
||||||
"""Update an existing LLM profile by id."""
|
"""Update an existing LLM profile by id."""
|
||||||
|
logger.info("[update_profile] id=%s name=%r model=%s", profile_id, request.name, request.model)
|
||||||
updated = profile_manager.update(
|
updated = profile_manager.update(
|
||||||
profile_id=profile_id,
|
profile_id=profile_id,
|
||||||
name=request.name,
|
name=request.name,
|
||||||
@@ -88,16 +146,21 @@ def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile
|
|||||||
timeout_seconds=request.timeout_seconds,
|
timeout_seconds=request.timeout_seconds,
|
||||||
)
|
)
|
||||||
if updated is None:
|
if updated is None:
|
||||||
|
logger.warning("[update_profile] not found id=%s", profile_id)
|
||||||
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
||||||
|
logger.info("[update_profile] updated id=%s", profile_id)
|
||||||
return updated
|
return updated
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/{profile_id}", response_model=dict)
|
@router.delete("/{profile_id}", response_model=dict)
|
||||||
def delete_profile(profile_id: str) -> dict:
|
def delete_profile(profile_id: str) -> dict:
|
||||||
"""Delete an LLM profile by id."""
|
"""Delete an LLM profile by id."""
|
||||||
|
logger.info("[delete_profile] id=%s", profile_id)
|
||||||
deleted = profile_manager.delete(profile_id)
|
deleted = profile_manager.delete(profile_id)
|
||||||
if not deleted:
|
if not deleted:
|
||||||
|
logger.warning("[delete_profile] not found id=%s", profile_id)
|
||||||
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
||||||
|
logger.info("[delete_profile] deleted id=%s", profile_id)
|
||||||
return {"deleted": True}
|
return {"deleted": True}
|
||||||
|
|
||||||
|
|
||||||
@@ -106,18 +169,31 @@ def test_profile(profile_id: str) -> ProfileTestResponse:
|
|||||||
"""Test LLM connectivity for a saved profile."""
|
"""Test LLM connectivity for a saved profile."""
|
||||||
profile = profile_manager.get(profile_id)
|
profile = profile_manager.get(profile_id)
|
||||||
if profile is None:
|
if profile is None:
|
||||||
|
logger.warning("[test_profile] not found id=%s", profile_id)
|
||||||
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
||||||
return _do_connectivity_test(
|
logger.info("[test_profile] id=%s model=%s base_url=%s", profile_id, profile.model, profile.base_url)
|
||||||
|
result = _do_connectivity_test(
|
||||||
model=profile.model,
|
model=profile.model,
|
||||||
base_url=profile.base_url,
|
base_url=profile.base_url,
|
||||||
api_key=profile.api_key,
|
api_key=profile.api_key,
|
||||||
timeout_seconds=profile.timeout_seconds,
|
timeout_seconds=profile.timeout_seconds,
|
||||||
)
|
)
|
||||||
|
logger.info("[test_profile] ok=%s latency=%sms", result.ok, result.latency_ms)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.post("/apply", response_model=ProfileApplyResponse)
|
@router.post("/apply", response_model=ProfileApplyResponse)
|
||||||
def apply_profiles(request: ProfileApplyRequest) -> ProfileApplyResponse:
|
def apply_profiles(request: ProfileApplyRequest) -> ProfileApplyResponse:
|
||||||
"""Patch selected LLM profiles into the target scenario YAML file."""
|
"""Patch selected LLM profiles into the target scenario YAML file."""
|
||||||
|
logger.info(
|
||||||
|
"[apply_profiles] scenario=%s judge=%s answer=%s dataset=%s metric_weights=%s doc_weights=%s",
|
||||||
|
request.scenario_path,
|
||||||
|
request.judge_profile_id,
|
||||||
|
request.answer_profile_id,
|
||||||
|
request.dataset_profile_id,
|
||||||
|
bool(request.metric_weights),
|
||||||
|
bool(request.doc_weights),
|
||||||
|
)
|
||||||
role_profiles: dict[str, LLMProfile | None] = {
|
role_profiles: dict[str, LLMProfile | None] = {
|
||||||
"judge": profile_manager.get(request.judge_profile_id) if request.judge_profile_id else None,
|
"judge": profile_manager.get(request.judge_profile_id) if request.judge_profile_id else None,
|
||||||
"answer": profile_manager.get(request.answer_profile_id) if request.answer_profile_id else None,
|
"answer": profile_manager.get(request.answer_profile_id) if request.answer_profile_id else None,
|
||||||
@@ -135,6 +211,7 @@ def apply_profiles(request: ProfileApplyRequest) -> ProfileApplyResponse:
|
|||||||
]
|
]
|
||||||
|
|
||||||
if missing:
|
if missing:
|
||||||
|
logger.warning("[apply_profiles] missing profiles for roles: %s", missing)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
detail=f"Profile(s) not found for roles: {', '.join(missing)}",
|
detail=f"Profile(s) not found for roles: {', '.join(missing)}",
|
||||||
@@ -148,6 +225,7 @@ def apply_profiles(request: ProfileApplyRequest) -> ProfileApplyResponse:
|
|||||||
metric_weights=request.metric_weights,
|
metric_weights=request.metric_weights,
|
||||||
doc_weights=request.doc_weights,
|
doc_weights=request.doc_weights,
|
||||||
)
|
)
|
||||||
|
logger.info("[apply_profiles] patched fields: %s", patched)
|
||||||
return ProfileApplyResponse(
|
return ProfileApplyResponse(
|
||||||
scenario_path=request.scenario_path,
|
scenario_path=request.scenario_path,
|
||||||
patched_fields=patched,
|
patched_fields=patched,
|
||||||
|
|||||||
131
webapp/api/pipeline.py
Normal file
131
webapp/api/pipeline.py
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
"""Routes for the end-to-end pipeline API (document parse → build → eval)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
|
||||||
|
from webapp.models import (
|
||||||
|
PipelineJobRequest,
|
||||||
|
PipelineJobResponse,
|
||||||
|
PipelineJobStatus,
|
||||||
|
)
|
||||||
|
from webapp.services.pipeline_task_manager import pipeline_task_manager
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/pipeline", tags=["pipeline"])
|
||||||
|
logger = logging.getLogger("webapp.api.pipeline")
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/jobs",
|
||||||
|
status_code=202,
|
||||||
|
response_model=PipelineJobResponse,
|
||||||
|
summary="提交全链路评估任务",
|
||||||
|
responses={
|
||||||
|
202: {
|
||||||
|
"description": "任务已成功排队,立即返回 job_id。",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {
|
||||||
|
"job_id": "a1b2c3d4e5f6",
|
||||||
|
"job_name": "siemens-ct-eval-2026",
|
||||||
|
"status": "queued",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
422: {"description": "请求参数校验失败(docs_path 等必填字段缺失或格式错误)。"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
def submit_pipeline_job(request: PipelineJobRequest) -> PipelineJobResponse:
|
||||||
|
"""提交一个「解析文档 → 生成题库 → RAGAS 评估 → 输出报告」全链路任务。
|
||||||
|
|
||||||
|
任务在后台线程中异步执行,立即返回 `job_id`。
|
||||||
|
通过 `GET /api/pipeline/jobs/{job_id}` 轮询 `status` / `phase` / `logs`。
|
||||||
|
|
||||||
|
**Pipeline 执行阶段**:
|
||||||
|
1. `parsing_documents` — 调用阿里云 DocMind 解析每份 PDF
|
||||||
|
2. `generating_questions` — LLM 从文档片段生成草稿题库
|
||||||
|
3. `evaluating` — RAGAS 在线评测打分(answer_model 答题 + judge_model 评分)
|
||||||
|
4. `done` — 所有产物写入磁盘,`status` 变为 `completed`
|
||||||
|
"""
|
||||||
|
logger.info(
|
||||||
|
"[submit_pipeline] docs_path=%s job_name=%r gen_model=%s judge=%s max_docs=%s",
|
||||||
|
request.docs_path, request.job_name, request.generation_model,
|
||||||
|
request.judge_model, request.max_documents,
|
||||||
|
)
|
||||||
|
task = pipeline_task_manager.submit(request)
|
||||||
|
logger.info("[submit_pipeline] queued job_id=%s job_name=%s", task.job_id, task.job_name)
|
||||||
|
return PipelineJobResponse(
|
||||||
|
job_id=task.job_id,
|
||||||
|
job_name=task.job_name,
|
||||||
|
status=task.status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/jobs/{job_id}",
|
||||||
|
response_model=PipelineJobStatus,
|
||||||
|
summary="查询任务状态",
|
||||||
|
responses={
|
||||||
|
200: {"description": "返回任务当前状态、执行阶段、日志及完成后的产物路径。"},
|
||||||
|
404: {"description": "指定 job_id 的任务不存在。"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
def get_pipeline_job(job_id: str) -> PipelineJobStatus:
|
||||||
|
"""查询一个 Pipeline 任务的当前状态、执行阶段、实时日志和结果。
|
||||||
|
|
||||||
|
**轮询建议**:每 3–5 秒查询一次,直到 `status` 为 `completed` 或 `failed`。
|
||||||
|
|
||||||
|
`result` 字段在任务完成后填充,包含:
|
||||||
|
- `scores_csv` — 每道题目逐项评分
|
||||||
|
- `summary_md` — 评估摘要 Markdown
|
||||||
|
- `dataset_csv` — 生成的题库 CSV
|
||||||
|
- `source_chunks_jsonl` — 文档片段索引
|
||||||
|
"""
|
||||||
|
status = pipeline_task_manager.get(job_id)
|
||||||
|
if status is None:
|
||||||
|
logger.warning("[get_pipeline_job] not found job_id=%s", job_id)
|
||||||
|
raise HTTPException(status_code=404, detail=f"Pipeline job not found: {job_id}")
|
||||||
|
logger.debug("[get_pipeline_job] job_id=%s status=%s phase=%s", job_id, status.status, status.phase)
|
||||||
|
return status
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/jobs",
|
||||||
|
response_model=dict,
|
||||||
|
summary="列出所有任务",
|
||||||
|
responses={
|
||||||
|
200: {
|
||||||
|
"description": "按创建时间倒序返回本次服务器会话中所有的 Pipeline 任务。",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {
|
||||||
|
"jobs": [
|
||||||
|
{
|
||||||
|
"job_id": "a1b2c3d4e5f6",
|
||||||
|
"job_name": "siemens-ct-eval",
|
||||||
|
"status": "completed",
|
||||||
|
"phase": "done",
|
||||||
|
"logs": ["[build] 17 documents parsed", "..."],
|
||||||
|
"result": {
|
||||||
|
"total_questions": 19,
|
||||||
|
"eval_run_id": "2026-06-18T...",
|
||||||
|
"scores_csv": "outputs/pipeline/.../scores.csv",
|
||||||
|
"summary_md": "outputs/pipeline/.../summary.md",
|
||||||
|
},
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
def list_pipeline_jobs() -> dict:
|
||||||
|
"""返回本次服务器会话中所有已提交的 Pipeline 任务,按创建时间倒序排列。"""
|
||||||
|
jobs = pipeline_task_manager.list_jobs()
|
||||||
|
logger.info("[list_pipeline_jobs] count=%d", len(jobs))
|
||||||
|
return {"jobs": [s.model_dump() for s in jobs]}
|
||||||
@@ -2,31 +2,42 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
|
|
||||||
from webapp.models import RunDetail
|
from webapp.models import RunDetail
|
||||||
from webapp.services import report_builder, run_reader
|
from webapp.services import report_builder, run_reader
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/runs", tags=["runs"])
|
router = APIRouter(prefix="/api/runs", tags=["runs"])
|
||||||
|
logger = logging.getLogger("webapp.api.runs")
|
||||||
|
|
||||||
|
|
||||||
@router.get("")
|
@router.get("")
|
||||||
def get_runs() -> dict[str, list]:
|
def get_runs() -> dict[str, list]:
|
||||||
"""Return summaries for every discoverable evaluation run."""
|
"""Return summaries for every discoverable evaluation run."""
|
||||||
summaries = run_reader.list_run_summaries()
|
summaries = run_reader.list_run_summaries()
|
||||||
|
logger.info("[get_runs] found %d runs", len(summaries))
|
||||||
return {"runs": [summary.model_dump() for summary in summaries]}
|
return {"runs": [summary.model_dump() for summary in summaries]}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{run_id}")
|
@router.get("/{run_id}")
|
||||||
def get_run_detail(run_id: str) -> RunDetail:
|
def get_run_detail(run_id: str) -> RunDetail:
|
||||||
"""Return the full summary and aggregated report for one run."""
|
"""Return the full summary and aggregated report for one run."""
|
||||||
|
logger.info("[get_run_detail] run_id=%s", run_id)
|
||||||
run_dir = run_reader.find_run_dir(run_id)
|
run_dir = run_reader.find_run_dir(run_id)
|
||||||
if run_dir is None:
|
if run_dir is None:
|
||||||
|
logger.warning("[get_run_detail] not found run_id=%s", run_id)
|
||||||
raise HTTPException(status_code=404, detail=f"未找到运行: {run_id}")
|
raise HTTPException(status_code=404, detail=f"未找到运行: {run_id}")
|
||||||
|
|
||||||
summary = run_reader.build_run_summary(run_dir)
|
summary = run_reader.build_run_summary(run_dir)
|
||||||
if summary is None:
|
if summary is None:
|
||||||
|
logger.warning("[get_run_detail] missing metadata run_id=%s", run_id)
|
||||||
raise HTTPException(status_code=404, detail=f"运行元数据缺失: {run_id}")
|
raise HTTPException(status_code=404, detail=f"运行元数据缺失: {run_id}")
|
||||||
|
|
||||||
report = report_builder.build_report(run_dir, summary.metrics)
|
report = report_builder.build_report(run_dir, summary.metrics)
|
||||||
|
logger.info(
|
||||||
|
"[get_run_detail] ok run_id=%s metrics=%s valid=%d invalid=%d",
|
||||||
|
run_id, summary.metrics, summary.valid_samples, summary.invalid_samples,
|
||||||
|
)
|
||||||
return RunDetail(summary=summary, report=report)
|
return RunDetail(summary=summary, report=report)
|
||||||
|
|||||||
@@ -2,15 +2,20 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
from webapp.services import scenario_scanner
|
from webapp.services import scenario_scanner
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/scenarios", tags=["scenarios"])
|
router = APIRouter(prefix="/api/scenarios", tags=["scenarios"])
|
||||||
|
logger = logging.getLogger("webapp.api.scenarios")
|
||||||
|
|
||||||
|
|
||||||
@router.get("")
|
@router.get("")
|
||||||
def get_scenarios() -> dict[str, list]:
|
def get_scenarios() -> dict[str, list]:
|
||||||
"""Return every scenario file found under the scenarios/ directory."""
|
"""Return every scenario file found under the scenarios/ directory."""
|
||||||
scenarios = scenario_scanner.list_scenarios()
|
scenarios = scenario_scanner.list_scenarios()
|
||||||
|
valid = sum(1 for s in scenarios if not s.error)
|
||||||
|
logger.info("[get_scenarios] total=%d valid=%d errors=%d", len(scenarios), valid, len(scenarios) - valid)
|
||||||
return {"scenarios": [item.model_dump() for item in scenarios]}
|
return {"scenarios": [item.model_dump() for item in scenarios]}
|
||||||
|
|||||||
@@ -2,10 +2,13 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
from fastapi import APIRouter, Header, HTTPException
|
from fastapi import APIRouter, Header, HTTPException, Request
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
from rag_eval.metrics.weights import compute_weighted_score
|
from rag_eval.metrics.weights import compute_weighted_score
|
||||||
from rag_eval.settings import EvaluationSettings
|
from rag_eval.settings import EvaluationSettings
|
||||||
@@ -13,6 +16,7 @@ from webapp.models import ScoreRequest, ScoreResponse
|
|||||||
from webapp.services.inline_scorer import inline_scorer
|
from webapp.services.inline_scorer import inline_scorer
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/score", tags=["score"])
|
router = APIRouter(prefix="/api/score", tags=["score"])
|
||||||
|
logger = logging.getLogger("webapp.api.score")
|
||||||
|
|
||||||
|
|
||||||
def _get_settings() -> EvaluationSettings:
|
def _get_settings() -> EvaluationSettings:
|
||||||
@@ -34,16 +38,74 @@ def _check_auth(authorization: str | None, token: str) -> None:
|
|||||||
response_model=ScoreResponse,
|
response_model=ScoreResponse,
|
||||||
summary="单题实时评分(Dify 外部 Tool)",
|
summary="单题实时评分(Dify 外部 Tool)",
|
||||||
responses={
|
responses={
|
||||||
200: {"description": "各指标得分和加权综合得分。"},
|
200: {
|
||||||
|
"description": "各指标得分、加权综合得分及耗时。",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {
|
||||||
|
"scores": {
|
||||||
|
"faithfulness": 0.875,
|
||||||
|
"answer_relevancy": 0.920,
|
||||||
|
"context_recall": 0.810,
|
||||||
|
"context_precision": 0.850,
|
||||||
|
},
|
||||||
|
"weighted_score": 0.8638,
|
||||||
|
"latency_ms": 3420,
|
||||||
|
"skipped_metrics": [],
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
401: {"description": "配置了 SCORE_API_TOKEN 但未提供有效 Bearer token。"},
|
401: {"description": "配置了 SCORE_API_TOKEN 但未提供有效 Bearer token。"},
|
||||||
422: {"description": "请求参数校验失败。"},
|
422: {"description": "请求参数校验失败(必填字段缺失或 metrics 名称不合法)。"},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
def score_sample(
|
def score_sample(
|
||||||
|
raw_request: Request,
|
||||||
request: ScoreRequest,
|
request: ScoreRequest,
|
||||||
authorization: Annotated[str | None, Header()] = None,
|
authorization: Annotated[str | None, Header()] = None,
|
||||||
) -> ScoreResponse:
|
) -> ScoreResponse:
|
||||||
"""Accept one QA sample, run RAGAS metrics synchronously, and return scores."""
|
"""接受单条问答记录,同步运行 RAGAS 指标打分,实时返回各指标得分。
|
||||||
|
|
||||||
|
**主要用途**:供 Dify 外部 Tool 调用。Dify Agent 在生成回答后,将
|
||||||
|
`(question, answer, contexts)` 发送到此端点,即可获得 RAGAS 质量评分,
|
||||||
|
用于日志记录、质量监控或触发 Agent 自我改进流程。
|
||||||
|
|
||||||
|
**contexts 格式**:多个检索片段用 `context_separator`(默认 `" |||| "`)拼接为一个字符串,
|
||||||
|
服务端自动拆分后传入 RAGAS 管道。
|
||||||
|
|
||||||
|
**ground_truth 可选**:
|
||||||
|
- 提供时:所有指定指标均参与计算。
|
||||||
|
- 缺失时:自动跳过依赖参考答案的指标(`context_recall`、
|
||||||
|
`factual_correctness`、`semantic_similarity`、`noise_sensitivity`),
|
||||||
|
跳过的指标在响应的 `skipped_metrics` 列表中列出,对应 `scores` 值为 `null`。
|
||||||
|
|
||||||
|
**支持的 RAGAS 指标**:
|
||||||
|
- `faithfulness` — 回答与检索片段的事实一致性
|
||||||
|
- `answer_relevancy` — 回答与问题的相关性
|
||||||
|
- `context_recall` — 参考答案覆盖到的检索内容比例(需 ground_truth)
|
||||||
|
- `context_precision` — 检索片段中与答案相关的部分占比
|
||||||
|
- `noise_sensitivity` — 对无关噪声片段的敏感度(需 ground_truth)
|
||||||
|
- `factual_correctness` — 回答与参考答案的事实准确性(需 ground_truth)
|
||||||
|
- `semantic_similarity` — 回答与参考答案的语义相似度(需 ground_truth)
|
||||||
|
|
||||||
|
**推荐模型配置**:
|
||||||
|
- `judge_model`: `gpt-5`
|
||||||
|
- `embedding_model`: `text-embedding-3-small`
|
||||||
|
|
||||||
|
**鉴权**:若 `.env` 中配置了 `SCORE_API_TOKEN`,需在请求头携带
|
||||||
|
`Authorization: Bearer <token>`;留空则无需鉴权(适合内网部署)。
|
||||||
|
"""
|
||||||
|
client = f"{raw_request.client.host}:{raw_request.client.port}" if raw_request.client else "unknown"
|
||||||
|
logger.info(
|
||||||
|
"[score] incoming client=%s method=%s content_type=%s metrics=%s has_gt=%s",
|
||||||
|
client,
|
||||||
|
raw_request.method,
|
||||||
|
raw_request.headers.get("content-type", ""),
|
||||||
|
request.metrics,
|
||||||
|
request.ground_truth is not None,
|
||||||
|
)
|
||||||
settings = _get_settings()
|
settings = _get_settings()
|
||||||
|
|
||||||
# Require Bearer auth only when the deployment configured a shared token.
|
# Require Bearer auth only when the deployment configured a shared token.
|
||||||
@@ -97,6 +159,12 @@ def score_sample(
|
|||||||
{},
|
{},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[score] done latency=%dms skipped=%s scores=%s",
|
||||||
|
latency_ms,
|
||||||
|
skipped,
|
||||||
|
{k: (round(v, 4) if v is not None else None) for k, v in all_scores.items()},
|
||||||
|
)
|
||||||
return ScoreResponse(
|
return ScoreResponse(
|
||||||
scores=all_scores,
|
scores=all_scores,
|
||||||
weighted_score=round(weighted, 4) if weighted is not None else None,
|
weighted_score=round(weighted, 4) if weighted is not None else None,
|
||||||
|
|||||||
@@ -408,26 +408,21 @@ class ScoreRequest(BaseModel):
|
|||||||
|
|
||||||
model_config = ConfigDict(
|
model_config = ConfigDict(
|
||||||
json_schema_extra={
|
json_schema_extra={
|
||||||
"examples": [
|
"example": {
|
||||||
{
|
"question": "双源CT的时间分辨率是多少?",
|
||||||
"summary": "基础评分请求",
|
"answer": "双源CT的单扇区时间分辨率为75ms。",
|
||||||
"value": {
|
"contexts": "双源CT采用两套管-探测器系统 |||| 单扇区采集旋转135度",
|
||||||
"question": "双源CT的时间分辨率是多少?",
|
"ground_truth": "双源CT单扇区时间分辨率为75ms,需旋转135度。",
|
||||||
"answer": "双源CT的单扇区时间分辨率为75ms。",
|
"context_separator": " |||| ",
|
||||||
"contexts": "双源CT采用两套管-探测器系统 |||| 单扇区采集旋转135度",
|
"metrics": [
|
||||||
"ground_truth": "双源CT单扇区时间分辨率为75ms,需旋转135度。",
|
"faithfulness",
|
||||||
"context_separator": " |||| ",
|
"answer_relevancy",
|
||||||
"metrics": [
|
"context_recall",
|
||||||
"faithfulness",
|
"context_precision",
|
||||||
"answer_relevancy",
|
],
|
||||||
"context_recall",
|
"judge_model": "gpt-5",
|
||||||
"context_precision",
|
"embedding_model": "text-embedding-3-small",
|
||||||
],
|
}
|
||||||
"judge_model": "deepseek-v4-flash",
|
|
||||||
"embedding_model": "text-embedding-v3",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -7,15 +7,21 @@ the server starts even when the evaluation dependencies are not yet installed.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.responses import FileResponse
|
from fastapi.encoders import jsonable_encoder
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
|
from fastapi.responses import FileResponse, JSONResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
from webapp.api import evaluations, llm_profiles, pipeline, runs, scenarios, score
|
from webapp.api import evaluations, llm_profiles, pipeline, runs, scenarios, score
|
||||||
|
|
||||||
STATIC_DIR = Path(__file__).resolve().parent / "static"
|
STATIC_DIR = Path(__file__).resolve().parent / "static"
|
||||||
|
logger = logging.getLogger("webapp.server")
|
||||||
|
access_logger = logging.getLogger("webapp.access")
|
||||||
|
|
||||||
# OpenAPI tag metadata — controls the grouping and descriptions in /docs.
|
# OpenAPI tag metadata — controls the grouping and descriptions in /docs.
|
||||||
OPENAPI_TAGS = [
|
OPENAPI_TAGS = [
|
||||||
@@ -92,7 +98,7 @@ def create_app() -> FastAPI:
|
|||||||
"- **报告 API** — 查询历史运行记录与评估报告\n\n"
|
"- **报告 API** — 查询历史运行记录与评估报告\n\n"
|
||||||
"> **快速开始**:调用 `POST /api/pipeline/jobs` 传入 PDF 文件夹路径即可启动完整评估流程。"
|
"> **快速开始**:调用 `POST /api/pipeline/jobs` 传入 PDF 文件夹路径即可启动完整评估流程。"
|
||||||
),
|
),
|
||||||
version="0.2.0",
|
version="0.3.0",
|
||||||
openapi_tags=OPENAPI_TAGS,
|
openapi_tags=OPENAPI_TAGS,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -103,6 +109,39 @@ def create_app() -> FastAPI:
|
|||||||
app.include_router(pipeline.router)
|
app.include_router(pipeline.router)
|
||||||
app.include_router(score.router)
|
app.include_router(score.router)
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def access_log_middleware(request: Request, call_next):
|
||||||
|
"""Log every API request with method, path, status code and latency.
|
||||||
|
|
||||||
|
Static file requests are logged at DEBUG level to keep the console clean.
|
||||||
|
"""
|
||||||
|
t0 = time.monotonic()
|
||||||
|
response = await call_next(request)
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
path = request.url.path
|
||||||
|
is_static = path.startswith("/static/") or path in ("/", "/favicon.ico")
|
||||||
|
msg = "%s %s → %d (%dms)", request.method, path, response.status_code, latency_ms
|
||||||
|
if is_static:
|
||||||
|
access_logger.debug(*msg)
|
||||||
|
else:
|
||||||
|
access_logger.info(*msg)
|
||||||
|
return response
|
||||||
|
|
||||||
|
@app.exception_handler(RequestValidationError)
|
||||||
|
async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse:
|
||||||
|
"""Log full validation error detail to help diagnose 422 responses."""
|
||||||
|
errors = jsonable_encoder(exc.errors())
|
||||||
|
logger.warning(
|
||||||
|
"[422] validation error url=%s content_type=%s errors=%s",
|
||||||
|
request.url.path,
|
||||||
|
request.headers.get("content-type", ""),
|
||||||
|
errors,
|
||||||
|
)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=422,
|
||||||
|
content={"detail": errors},
|
||||||
|
)
|
||||||
|
|
||||||
@app.get("/api/health", tags=["meta"])
|
@app.get("/api/health", tags=["meta"])
|
||||||
def health() -> dict[str, str]:
|
def health() -> dict[str, str]:
|
||||||
"""Report basic liveness so the UI can confirm the server is reachable."""
|
"""Report basic liveness so the UI can confirm the server is reachable."""
|
||||||
|
|||||||
@@ -294,6 +294,21 @@ table.group-table td { border-bottom: 1px solid #f1f5f9; font-variant-numeric: t
|
|||||||
.btn-sm { padding: 4px 10px; font-size: 12px; }
|
.btn-sm { padding: 4px 10px; font-size: 12px; }
|
||||||
.btn-danger { color: var(--bad); border-color: var(--bad); }
|
.btn-danger { color: var(--bad); border-color: var(--bad); }
|
||||||
.btn-danger:hover { background: #fee2e2; }
|
.btn-danger:hover { background: #fee2e2; }
|
||||||
|
.btn-test { color: #0369a1; border-color: #0369a1; }
|
||||||
|
.btn-test:hover { background: #e0f2fe; }
|
||||||
|
|
||||||
|
/* LLM 连通性测试结果 */
|
||||||
|
.profile-test-result {
|
||||||
|
margin-top: 8px;
|
||||||
|
padding: 6px 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 500;
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
.profile-test-result:not([hidden]) { display: block; }
|
||||||
|
.profile-test-result.ok { background: #dcfce7; color: #166534; border: 1px solid #bbf7d0; }
|
||||||
|
.profile-test-result.fail { background: #fee2e2; color: #991b1b; border: 1px solid #fecaca; word-break: break-all; }
|
||||||
|
|
||||||
/* 选中态 run 卡片 */
|
/* 选中态 run 卡片 */
|
||||||
.run-card.selected {
|
.run-card.selected {
|
||||||
@@ -310,6 +325,7 @@ table.group-table td { border-bottom: 1px solid #f1f5f9; font-variant-numeric: t
|
|||||||
|
|
||||||
/* ---------- API 文档 iframe ---------- */
|
/* ---------- API 文档 iframe ---------- */
|
||||||
#view-apidocs { padding: 0; display: flex; flex-direction: column; flex: 1; }
|
#view-apidocs { padding: 0; display: flex; flex-direction: column; flex: 1; }
|
||||||
|
#view-apidocs[hidden] { display: none; }
|
||||||
.apidocs-frame {
|
.apidocs-frame {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
@@ -404,6 +420,7 @@ table.group-table td { border-bottom: 1px solid #f1f5f9; font-variant-numeric: t
|
|||||||
.app { display: block; }
|
.app { display: block; }
|
||||||
.main { display: block; width: 100%; }
|
.main { display: block; width: 100%; }
|
||||||
.view { padding: 0; display: block !important; }
|
.view { padding: 0; display: block !important; }
|
||||||
|
#view-apidocs { display: none !important; } /* never print the API docs iframe */
|
||||||
#view-report { display: block !important; }
|
#view-report { display: block !important; }
|
||||||
|
|
||||||
/* ── 报告内容 ── */
|
/* ── 报告内容 ── */
|
||||||
|
|||||||
@@ -219,9 +219,11 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="form-actions">
|
<div class="form-actions">
|
||||||
<button class="btn btn-primary" id="save-profile-btn">保存</button>
|
<button class="btn btn-primary" id="save-profile-btn">保存</button>
|
||||||
|
<button class="btn btn-test" id="test-profile-btn">测试连通性</button>
|
||||||
<button class="btn" id="cancel-profile-btn">取消</button>
|
<button class="btn" id="cancel-profile-btn">取消</button>
|
||||||
<span class="form-error muted" id="profile-form-error"></span>
|
<span class="form-error muted" id="profile-form-error"></span>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="profile-test-result" id="profile-form-test-result" hidden></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -65,4 +65,16 @@ const API = {
|
|||||||
});
|
});
|
||||||
},
|
},
|
||||||
applyProfiles(body) { return API.post("/api/llm-profiles/apply", body); },
|
applyProfiles(body) { return API.post("/api/llm-profiles/apply", body); },
|
||||||
|
|
||||||
|
// 测试已保存 profile 的连通性
|
||||||
|
testProfile(id) {
|
||||||
|
return fetch(`/api/llm-profiles/${encodeURIComponent(id)}/test`, { method: "POST" })
|
||||||
|
.then(async r => {
|
||||||
|
if (!r.ok) { const d = await API._extractError(r); throw new Error(d); }
|
||||||
|
return r.json();
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
// 测试表单中填写的内联参数(保存前即可测试)
|
||||||
|
probeConnectivity(body) { return API.post("/api/llm-profiles/probe", body); },
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ const Profiles = {
|
|||||||
document.getElementById("add-profile-btn").addEventListener("click", () => Profiles.showForm());
|
document.getElementById("add-profile-btn").addEventListener("click", () => Profiles.showForm());
|
||||||
document.getElementById("save-profile-btn").addEventListener("click", () => Profiles.save());
|
document.getElementById("save-profile-btn").addEventListener("click", () => Profiles.save());
|
||||||
document.getElementById("cancel-profile-btn").addEventListener("click", () => Profiles.hideForm());
|
document.getElementById("cancel-profile-btn").addEventListener("click", () => Profiles.hideForm());
|
||||||
|
document.getElementById("test-profile-btn").addEventListener("click", () => Profiles.testForm());
|
||||||
},
|
},
|
||||||
|
|
||||||
// 加载并渲染 Profile 列表
|
// 加载并渲染 Profile 列表
|
||||||
@@ -39,6 +40,7 @@ const Profiles = {
|
|||||||
<div class="profile-card-head">
|
<div class="profile-card-head">
|
||||||
<div class="profile-card-name">${App.escape(p.name)}</div>
|
<div class="profile-card-name">${App.escape(p.name)}</div>
|
||||||
<div class="profile-card-actions">
|
<div class="profile-card-actions">
|
||||||
|
<button class="btn btn-sm btn-test" data-action="test">测试</button>
|
||||||
<button class="btn btn-sm" data-action="edit">编辑</button>
|
<button class="btn btn-sm" data-action="edit">编辑</button>
|
||||||
<button class="btn btn-sm btn-danger" data-action="delete">删除</button>
|
<button class="btn btn-sm btn-danger" data-action="delete">删除</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -46,12 +48,72 @@ const Profiles = {
|
|||||||
<div class="profile-card-field"><span class="field-label">模型</span> <code>${App.escape(p.model)}</code></div>
|
<div class="profile-card-field"><span class="field-label">模型</span> <code>${App.escape(p.model)}</code></div>
|
||||||
<div class="profile-card-field"><span class="field-label">Base URL</span> <code>${App.escape(p.base_url)}</code></div>
|
<div class="profile-card-field"><span class="field-label">Base URL</span> <code>${App.escape(p.base_url)}</code></div>
|
||||||
<div class="profile-card-field"><span class="field-label">超时</span> ${p.timeout_seconds}s</div>
|
<div class="profile-card-field"><span class="field-label">超时</span> ${p.timeout_seconds}s</div>
|
||||||
|
<div class="profile-test-result" data-result hidden></div>
|
||||||
`;
|
`;
|
||||||
|
card.querySelector("[data-action=test]").addEventListener("click", () => Profiles.testCard(p, card));
|
||||||
card.querySelector("[data-action=edit]").addEventListener("click", () => Profiles.showForm(p));
|
card.querySelector("[data-action=edit]").addEventListener("click", () => Profiles.showForm(p));
|
||||||
card.querySelector("[data-action=delete]").addEventListener("click", () => Profiles.remove(p.profile_id, p.name));
|
card.querySelector("[data-action=delete]").addEventListener("click", () => Profiles.remove(p.profile_id, p.name));
|
||||||
return card;
|
return card;
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// 测试已保存的 profile(卡片上的测试按钮)
|
||||||
|
async testCard(p, card) {
|
||||||
|
const btn = card.querySelector("[data-action=test]");
|
||||||
|
const resultEl = card.querySelector("[data-result]");
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.textContent = "测试中…";
|
||||||
|
resultEl.hidden = true;
|
||||||
|
resultEl.className = "profile-test-result";
|
||||||
|
try {
|
||||||
|
const res = await API.testProfile(p.profile_id);
|
||||||
|
Profiles._showTestResult(resultEl, res);
|
||||||
|
} catch (err) {
|
||||||
|
Profiles._showTestResult(resultEl, { ok: false, message: err.message });
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
btn.textContent = "测试";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
// 测试表单中当前填写的参数(保存前即可测试)
|
||||||
|
async testForm() {
|
||||||
|
const body = {
|
||||||
|
model: document.getElementById("pf-model").value.trim(),
|
||||||
|
base_url: document.getElementById("pf-base-url").value.trim(),
|
||||||
|
api_key: document.getElementById("pf-api-key").value.trim(),
|
||||||
|
timeout_seconds: parseInt(document.getElementById("pf-timeout").value, 10) || 30,
|
||||||
|
};
|
||||||
|
const errEl = document.getElementById("profile-form-error");
|
||||||
|
if (!body.model || !body.base_url || !body.api_key) {
|
||||||
|
errEl.textContent = "请先填写模型名称、Base URL 和 API Key";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
errEl.textContent = "";
|
||||||
|
const testBtn = document.getElementById("test-profile-btn");
|
||||||
|
const resultEl = document.getElementById("profile-form-test-result");
|
||||||
|
testBtn.disabled = true;
|
||||||
|
testBtn.textContent = "测试中…";
|
||||||
|
resultEl.hidden = true;
|
||||||
|
resultEl.className = "profile-test-result";
|
||||||
|
try {
|
||||||
|
const res = await API.probeConnectivity(body);
|
||||||
|
Profiles._showTestResult(resultEl, res);
|
||||||
|
} catch (err) {
|
||||||
|
Profiles._showTestResult(resultEl, { ok: false, message: err.message });
|
||||||
|
} finally {
|
||||||
|
testBtn.disabled = false;
|
||||||
|
testBtn.textContent = "测试连通性";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
// 渲染测试结果到指定元素
|
||||||
|
_showTestResult(el, res) {
|
||||||
|
el.hidden = false;
|
||||||
|
el.classList.add(res.ok ? "ok" : "fail");
|
||||||
|
const latency = res.latency_ms != null ? ` (${res.latency_ms}ms)` : "";
|
||||||
|
el.textContent = res.ok ? `✓ 连接成功${latency}` : `✗ ${res.message}`;
|
||||||
|
},
|
||||||
|
|
||||||
// 显示新建或编辑表单
|
// 显示新建或编辑表单
|
||||||
showForm(profile = null) {
|
showForm(profile = null) {
|
||||||
const panel = document.getElementById("profile-form-panel");
|
const panel = document.getElementById("profile-form-panel");
|
||||||
@@ -65,6 +127,9 @@ const Profiles = {
|
|||||||
document.getElementById("pf-api-key").value = profile ? profile.api_key : "";
|
document.getElementById("pf-api-key").value = profile ? profile.api_key : "";
|
||||||
document.getElementById("pf-timeout").value = profile ? profile.timeout_seconds : 30;
|
document.getElementById("pf-timeout").value = profile ? profile.timeout_seconds : 30;
|
||||||
document.getElementById("profile-form-error").textContent = "";
|
document.getElementById("profile-form-error").textContent = "";
|
||||||
|
const resultEl = document.getElementById("profile-form-test-result");
|
||||||
|
resultEl.hidden = true;
|
||||||
|
resultEl.className = "profile-test-result";
|
||||||
panel.scrollIntoView({ behavior: "smooth", block: "start" });
|
panel.scrollIntoView({ behavior: "smooth", block: "start" });
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
99
webmain.py
99
webmain.py
@@ -5,13 +5,73 @@ and the same runs/ artifacts. Example:
|
|||||||
|
|
||||||
python webmain.py
|
python webmain.py
|
||||||
python webmain.py --host 0.0.0.0 --port 8800
|
python webmain.py --host 0.0.0.0 --port 8800
|
||||||
|
python webmain.py --host 0.0.0.0 --port 8800 --log-level debug
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import uvicorn
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parent
|
||||||
|
|
||||||
|
|
||||||
|
def _build_log_config(log_file: Path, level: str) -> dict:
|
||||||
|
"""Build a uvicorn-compatible logging config dict.
|
||||||
|
|
||||||
|
Writes to both stderr (console) and a rotating daily log file.
|
||||||
|
All webapp.* and rag_eval.* loggers inherit from root so every
|
||||||
|
logger.info() call in the API routes is captured.
|
||||||
|
"""
|
||||||
|
level_upper = level.upper()
|
||||||
|
return {
|
||||||
|
"version": 1,
|
||||||
|
"disable_existing_loggers": False,
|
||||||
|
"formatters": {
|
||||||
|
"detailed": {
|
||||||
|
"format": "%(asctime)s %(levelname)-8s %(name)s %(message)s",
|
||||||
|
"datefmt": "%Y-%m-%d %H:%M:%S",
|
||||||
|
},
|
||||||
|
"console": {
|
||||||
|
"format": "%(asctime)s %(levelname)-8s %(name)-30s %(message)s",
|
||||||
|
"datefmt": "%H:%M:%S",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"handlers": {
|
||||||
|
"console": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"stream": "ext://sys.stderr",
|
||||||
|
"formatter": "console",
|
||||||
|
"level": level_upper,
|
||||||
|
},
|
||||||
|
"file": {
|
||||||
|
"class": "logging.handlers.RotatingFileHandler",
|
||||||
|
"filename": str(log_file),
|
||||||
|
"maxBytes": 50 * 1024 * 1024, # 50 MB per file
|
||||||
|
"backupCount": 7, # keep 7 rotated files
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"formatter": "detailed",
|
||||||
|
"level": "DEBUG", # file always captures everything
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"loggers": {
|
||||||
|
# Our application loggers — detailed level
|
||||||
|
"webapp": {"handlers": ["console", "file"], "level": level_upper, "propagate": False},
|
||||||
|
"rag_eval": {"handlers": ["console", "file"], "level": level_upper, "propagate": False},
|
||||||
|
# uvicorn access log — captured to file, shown on console
|
||||||
|
"uvicorn.access": {"handlers": ["console", "file"], "level": "INFO", "propagate": False},
|
||||||
|
"uvicorn.error": {"handlers": ["console", "file"], "level": "INFO", "propagate": False},
|
||||||
|
"uvicorn": {"handlers": ["console", "file"], "level": "INFO", "propagate": False},
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"handlers": ["console", "file"],
|
||||||
|
"level": "WARNING", # suppress noisy third-party libs at WARNING
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
@@ -24,17 +84,52 @@ def parse_args() -> argparse.Namespace:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Enable auto-reload for local development.",
|
help="Enable auto-reload for local development.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--log-level",
|
||||||
|
default="info",
|
||||||
|
choices=["debug", "info", "warning", "error"],
|
||||||
|
help="Console log level (default: info). File always captures DEBUG.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--log-file",
|
||||||
|
default=None,
|
||||||
|
help="Log file path (default: logs/server_YYYY-MM-DD.log).",
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
"""Start uvicorn with the configured application."""
|
"""Start uvicorn with the configured application and logging."""
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
|
# Resolve log file path
|
||||||
|
logs_dir = REPO_ROOT / "logs"
|
||||||
|
logs_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
if args.log_file:
|
||||||
|
log_file = Path(args.log_file)
|
||||||
|
else:
|
||||||
|
date_str = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
log_file = logs_dir / f"server_{date_str}.log"
|
||||||
|
|
||||||
|
log_config = _build_log_config(log_file, args.log_level)
|
||||||
|
|
||||||
|
# Apply config before uvicorn starts so our loggers are ready immediately
|
||||||
|
logging.config.dictConfig(log_config)
|
||||||
|
|
||||||
|
logger = logging.getLogger("webapp.server")
|
||||||
|
logger.info(
|
||||||
|
"Starting RAGAS Console host=%s port=%d log_level=%s log_file=%s",
|
||||||
|
args.host, args.port, args.log_level, log_file,
|
||||||
|
)
|
||||||
|
|
||||||
uvicorn.run(
|
uvicorn.run(
|
||||||
"webapp.server:app",
|
"webapp.server:app",
|
||||||
host=args.host,
|
host=args.host,
|
||||||
port=args.port,
|
port=args.port,
|
||||||
reload=args.reload,
|
reload=args.reload,
|
||||||
|
log_config=log_config, # hand our config to uvicorn so it uses same handlers
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user