siemens_ragas/tests/test_pipeline.py

"""Tests for the end-to-end pipeline API and pipeline task manager."""

from __future__ import annotations

import json
import time
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest
from fastapi.testclient import TestClient


# ── fixtures ──────────────────────────────────────────────────────────────────

@pytest.fixture()
def client(tmp_path, monkeypatch):
    """TestClient with a fresh PipelineTaskManager backed by tmp_path outputs."""
    import webapp.services.pipeline_task_manager as mgr_mod
    from webapp.services.pipeline_task_manager import PipelineTaskManager

    fresh_mgr = PipelineTaskManager(max_workers=2)
    monkeypatch.setattr(mgr_mod, "pipeline_task_manager", fresh_mgr)
    monkeypatch.setattr(mgr_mod, "_PIPELINE_OUTPUT_ROOT", tmp_path / "pipeline")

    import webapp.api.pipeline as api_mod
    monkeypatch.setattr(api_mod, "pipeline_task_manager", fresh_mgr)

    from webapp.server import create_app
    return TestClient(create_app())


def _minimal_pdf_dir(tmp_path: Path) -> Path:
    """Create a temp directory that looks like a PDF folder (empty, valid dir)."""
    d = tmp_path / "pdfs"
    d.mkdir()
    return d


def _mock_build_result(tmp_path: Path, job, run_id="r1"):
    """Return a fake DatasetBuildResult with a minimal dataset CSV."""
    from rag_eval.dataset_builder.models import (
        DatasetBuildArtifactPaths,
        DatasetBuildResult,
        DraftQuestionSample,
    )

    artifact_root = tmp_path / "build" / run_id
    artifact_root.mkdir(parents=True, exist_ok=True)
    latest = tmp_path / "build" / "latest"
    latest.mkdir(parents=True, exist_ok=True)

    chunks_path = artifact_root / "source_chunks.jsonl"
    chunks_path.write_text(
        json.dumps({"chunk_id": "c1", "doc_id": "d1", "doc_name": "test.pdf",
                    "text": "CT scan context.", "page_start": 1, "page_end": 1,
                    "section_path": "/", "section_title": "", "source_layout_ids": []}) + "\n",
        encoding="utf-8",
    )
    (latest / "source_chunks.jsonl").write_text(chunks_path.read_text(encoding="utf-8"), encoding="utf-8")

    dataset_csv = tmp_path / "generated_dataset.csv"
    dataset_csv.write_text(
        "sample_id,question,ground_truth,scenario,language,doc_id,doc_name,"
        "section_path,page_start,page_end,source_chunk_ids,question_type,difficulty,"
        "review_status,review_notes\n"
        's1,"What is CT?","CT is imaging.","test","zh","d1","test.pdf","/",'
        '1,1,"[""c1""]","fact","easy","draft",""\n',
        encoding="utf-8",
    )

    sample = DraftQuestionSample(
        sample_id="s1", question="What is CT?", ground_truth="CT is imaging.",
        scenario="test", language="zh", doc_id="d1", doc_name="test.pdf",
        section_path="/", page_start=1, page_end=1, source_chunk_ids=["c1"],
        question_type="fact", difficulty="easy",
    )

    artifact_paths = DatasetBuildArtifactPaths(
        root_dir=artifact_root,
        documents_jsonl=artifact_root / "documents.jsonl",
        semantic_blocks_jsonl=artifact_root / "semantic_blocks.jsonl",
        source_chunks_jsonl=chunks_path,
        dataset_draft_csv=artifact_root / "dataset_draft.csv",
        parse_failures_csv=artifact_root / "parse_failures.csv",
        metadata_json=artifact_root / "metadata.json",
    )
    return DatasetBuildResult(
        job=job,
        run_id=run_id,
        artifact_paths=artifact_paths,
        documents=[],
        draft_samples=[sample],
        parse_failures=[],
    )


def _mock_eval_result(tmp_path: Path, scenario):
    """Return a fake EvaluationResult."""
    from rag_eval.shared.models import EvaluationResult

    return EvaluationResult(
        scenario=scenario,
        run_id="eval-r1",
        started_at="2026-01-01T00:00:00",
        finished_at="2026-01-01T00:01:00",
        valid_samples=[],
        invalid_samples=[],
        score_rows=[],
    )


# ── API route tests ────────────────────────────────────────────────────────────

def test_submit_returns_202_and_job_id(client, tmp_path):
    """POST /api/pipeline/jobs returns 202 with job_id immediately."""
    pdf_dir = _minimal_pdf_dir(tmp_path)

    with patch("webapp.services.pipeline_task_manager.PipelineTaskManager._execute") as mock_exec:
        from webapp.models import PipelineResult
        mock_exec.return_value = PipelineResult(
            build_artifact_dir="/tmp/b", dataset_csv="/tmp/d.csv",
            source_chunks_jsonl="/tmp/c.jsonl", total_questions=1,
            parse_failures=0, eval_run_id="r1", eval_output_dir="/tmp/e",
            scores_csv="/tmp/scores.csv", summary_md="/tmp/summary.md",
        )
        resp = client.post("/api/pipeline/jobs", json={
            "docs_path": str(pdf_dir),
            "job_name": "test-job",
        })

    assert resp.status_code == 202
    data = resp.json()
    assert "job_id" in data
    assert data["job_name"] == "test-job"
    # status may already be completed by the time the response is read (mock runs instantly)
    assert data["status"] in ("queued", "completed")


def test_get_nonexistent_job_returns_404(client):
    """GET /api/pipeline/jobs/{id} returns 404 for unknown job."""
    resp = client.get("/api/pipeline/jobs/doesnotexist")
    assert resp.status_code == 404


def test_list_jobs_returns_empty_initially(client):
    """GET /api/pipeline/jobs returns empty list when no jobs submitted."""
    resp = client.get("/api/pipeline/jobs")
    assert resp.status_code == 200
    assert resp.json()["jobs"] == []


def test_job_status_polling(client, tmp_path):
    """Submitted job becomes visible via GET /api/pipeline/jobs/{id}."""
    pdf_dir = _minimal_pdf_dir(tmp_path)

    with patch("webapp.services.pipeline_task_manager.PipelineTaskManager._execute") as mock_exec:
        from webapp.models import PipelineResult
        mock_exec.return_value = PipelineResult(
            build_artifact_dir="/tmp/b", dataset_csv="/tmp/d.csv",
            source_chunks_jsonl="/tmp/c.jsonl", total_questions=3,
            parse_failures=0, eval_run_id="r2", eval_output_dir="/tmp/e",
            scores_csv="/tmp/scores.csv", summary_md="/tmp/summary.md",
        )
        post_resp = client.post("/api/pipeline/jobs", json={"docs_path": str(pdf_dir)})

    job_id = post_resp.json()["job_id"]

    # Poll until done or timeout (max 5s for mock)
    for _ in range(20):
        status_resp = client.get(f"/api/pipeline/jobs/{job_id}")
        assert status_resp.status_code == 200
        status = status_resp.json()
        if status["status"] in ("completed", "failed"):
            break
        time.sleep(0.25)

    assert status["status"] == "completed"
    assert status["result"]["total_questions"] == 3


def test_job_fails_on_invalid_docs_path(client):
    """Job fails quickly if docs_path does not exist."""
    resp = client.post("/api/pipeline/jobs", json={
        "docs_path": "/nonexistent/path/that/does/not/exist",
    })
    assert resp.status_code == 202
    job_id = resp.json()["job_id"]

    for _ in range(20):
        status_resp = client.get(f"/api/pipeline/jobs/{job_id}")
        status = status_resp.json()
        if status["status"] in ("completed", "failed"):
            break
        time.sleep(0.25)

    assert status["status"] == "failed"
    assert "docs_path" in status["error"] or "not" in status["error"].lower()


def test_list_jobs_shows_submitted(client, tmp_path):
    """GET /api/pipeline/jobs includes jobs after submission."""
    pdf_dir = _minimal_pdf_dir(tmp_path)

    with patch("webapp.services.pipeline_task_manager.PipelineTaskManager._execute") as mock_exec:
        from webapp.models import PipelineResult
        mock_exec.return_value = PipelineResult(
            build_artifact_dir="/tmp/b", dataset_csv="/tmp/d.csv",
            source_chunks_jsonl="/tmp/c.jsonl", total_questions=1,
            parse_failures=0, eval_run_id="r3", eval_output_dir="/tmp/e",
            scores_csv="/tmp/scores.csv", summary_md="/tmp/summary.md",
        )
        client.post("/api/pipeline/jobs", json={"docs_path": str(pdf_dir), "job_name": "listed-job"})

    time.sleep(0.5)
    list_resp = client.get("/api/pipeline/jobs")
    assert list_resp.status_code == 200
    jobs = list_resp.json()["jobs"]
    assert len(jobs) >= 1
    names = [j["job_name"] for j in jobs]
    assert "listed-job" in names


# ── execute_dataset_build_job refactor test ────────────────────────────────────

def test_execute_dataset_build_job_directly(tmp_path):
    """execute_dataset_build_job runs the build without a YAML file."""
    from unittest.mock import patch as _patch
    from rag_eval.dataset_builder.models import DatasetBuildJob, DatasetBuildRuntime
    from rag_eval.dataset_builder.runner import execute_dataset_build_job
    from rag_eval.settings import EvaluationSettings

    pdf_dir = tmp_path / "pdfs"
    pdf_dir.mkdir()
    (pdf_dir / "doc.pdf").write_bytes(b"%PDF-fake")

    job = DatasetBuildJob(
        job_name="direct-test",
        input_path=pdf_dir,
        input_glob="*.pdf",
        parser_provider="aliyun_docmind",
        failure_mode="skip",
        generation_model="test-model",
        output_type="online_question_bank",
        review_mode="draft_with_manual_review",
        max_questions_per_document=5,
        max_source_chunks_per_question=3,
        dataset_path=tmp_path / "out.csv",
        artifact_dir=tmp_path / "artifacts",
        runtime=DatasetBuildRuntime(max_documents=1),
    )

    mock_doc = MagicMock()
    mock_doc.doc_id = "d1"
    mock_doc.doc_name = "doc.pdf"
    mock_doc.source_chunks = []
    mock_doc.semantic_blocks = []
    mock_doc.raw_text = ""
    mock_doc.structure_nodes = []
    mock_doc.metadata = {}
    mock_doc.to_record.return_value = {
        "doc_id": "d1", "doc_name": "doc.pdf", "raw_text": "",
        "structure_nodes": [], "metadata": {},
        "semantic_block_count": 0, "source_chunk_count": 0,
    }

    mock_parser = MagicMock()
    mock_parser.parse.return_value = mock_doc

    mock_generator = MagicMock()
    mock_generator.generate.return_value = []

    result = execute_dataset_build_job(
        job,
        settings=EvaluationSettings(_env_file=None),
        parser=mock_parser,
        generator=mock_generator,
    )
    assert result.job.job_name == "direct-test"
    assert result.artifact_paths.root_dir.exists()