fix somethings

This commit is contained in:
2026-06-08 11:16:28 +08:00
parent 9fea9c6a53
commit e7963b267e
34 changed files with 5195 additions and 246 deletions

View File

View File

@@ -0,0 +1,95 @@
"""Contract tests: any BaseEventStore implementation must pass these."""
from app.infrastructure.perception.base_event_store import BaseEventStore
from app.infrastructure.perception.mock_event_store import MockEventStore
def _store() -> BaseEventStore:
return MockEventStore()
def test_is_base_event_store():
assert isinstance(_store(), BaseEventStore)
def test_all_returns_list():
result = _store().all()
assert isinstance(result, list)
assert len(result) > 0
def test_get_known_id():
store = _store()
first = store.all()[0]
result = store.get(first["id"])
assert result is not None
assert result["id"] == first["id"]
def test_get_unknown_returns_none():
assert _store().get("does-not-exist") is None
def test_filter_by_impact():
store = _store()
highs = store.filter(impact_level="high", limit=100)
assert all(e["impact_level"] == "high" for e in highs)
def test_filter_limit():
store = _store()
result = store.filter(limit=3)
assert len(result) <= 3
def test_stats_keys():
stats = _store().stats()
for key in ("total", "high_impact", "medium_impact", "recent_90d"):
assert key in stats, f"missing key: {key}"
def test_upsert_and_get():
store = _store()
event = {
"id": "test-upsert-001",
"source": "TEST",
"source_label": "Test Source",
"standard_code": "TST-001",
"title": "Test Event",
"summary": "A test event",
"full_text_url": "https://example.com",
"status": "draft",
"impact_level": "low",
"published_at": "2026-01-01",
"effective_at": None,
"category": "test",
"tags": ["test"],
"content_hash": "abc123",
"previous_hash": None,
}
store.upsert(event)
result = store.get("test-upsert-001")
assert result is not None
assert result["title"] == "Test Event"
def test_get_by_standard_code():
store = _store()
first = store.all()[0]
result = store.get_by_standard_code(first["standard_code"])
assert result is not None
assert result["standard_code"] == first["standard_code"]
def test_upsert_updates_existing():
store = _store()
first = store.all()[0]
original_id = first["id"]
store.upsert({"id": original_id, "title": "Updated Title", "impact_level": first["impact_level"],
"standard_code": first.get("standard_code", ""), "source": first["source"],
"source_label": first.get("source_label", ""), "summary": "Updated",
"full_text_url": "", "status": first["status"], "published_at": first.get("published_at", ""),
"effective_at": None, "category": first.get("category", ""), "tags": [],
"content_hash": "newhash", "previous_hash": None})
result = store.get(original_id)
assert result is not None
assert result["title"] == "Updated Title"

View File

@@ -0,0 +1,111 @@
"""Integration tests for CrawlService."""
from __future__ import annotations
from unittest.mock import MagicMock
import hashlib
import pytest
from app.infrastructure.perception.crawlers.base import RawEvent
from app.infrastructure.perception.mock_event_store import MockEventStore
def _make_raw_event(code="TST-001"):
return RawEvent(
source="TEST", source_label="Test", standard_code=code,
title=f"Test {code}", summary="Summary", full_text_url="https://example.com",
status="enacted", published_at="2026-01-01", effective_at=None,
category="test", tags=["test"], raw_text="full text",
)
def _make_service(raw_events):
from app.application.perception.crawl_service import CrawlService
mock_crawler = MagicMock()
mock_crawler.fetch.return_value = raw_events
mock_pipeline = MagicMock()
mock_pipeline.extract_structure.return_value = {
"obligations": [], "deadlines": [], "scope": "test",
"penalties": None, "impact_level": "low",
}
mock_pipeline.assess_impact.return_value = []
mock_pipeline.compute_diff.return_value = {
"changed_sections": [], "change_summary": "No changes.",
}
mock_retrieval = MagicMock()
store = MockEventStore()
return CrawlService(
crawlers={"TEST": mock_crawler},
event_store=store,
llm_pipeline=mock_pipeline,
retrieval_service=mock_retrieval,
)
def test_crawl_yields_progress_and_done():
svc = _make_service([_make_raw_event("TST-001")])
events = list(svc.run_crawl())
event_types = [e.get("event") for e in events]
assert "done" in event_types
def test_crawl_upserts_to_store():
store = MockEventStore()
from app.application.perception.crawl_service import CrawlService
mock_crawler = MagicMock()
mock_crawler.fetch.return_value = [_make_raw_event("NEW-001")]
mock_pipeline = MagicMock()
mock_pipeline.extract_structure.return_value = {
"obligations": [], "deadlines": [], "scope": "",
"penalties": None, "impact_level": "medium",
}
mock_pipeline.assess_impact.return_value = []
mock_pipeline.compute_diff.return_value = {
"changed_sections": [], "change_summary": "",
}
svc = CrawlService(
crawlers={"TEST": mock_crawler},
event_store=store,
llm_pipeline=mock_pipeline,
retrieval_service=MagicMock(),
)
list(svc.run_crawl())
result = store.get_by_standard_code("NEW-001")
assert result is not None
assert result["title"] == "Test NEW-001"
def test_crawl_skips_unchanged_events():
store = MockEventStore()
raw = _make_raw_event("SKIP-001")
content_hash = hashlib.sha256(raw.raw_text.encode()).hexdigest()
store.upsert({
"id": hashlib.sha256(f"TEST-SKIP-001".encode()).hexdigest()[:12],
"standard_code": "SKIP-001",
"source": "TEST",
"source_label": "Test",
"title": "Test SKIP-001",
"summary": "",
"full_text_url": "",
"status": "enacted",
"impact_level": "low",
"published_at": "2026-01-01",
"effective_at": None,
"category": "test",
"tags": [],
"content_hash": content_hash,
})
mock_pipeline = MagicMock()
from app.application.perception.crawl_service import CrawlService
mock_crawler = MagicMock()
mock_crawler.fetch.return_value = [raw]
svc = CrawlService(
crawlers={"TEST": mock_crawler},
event_store=store,
llm_pipeline=mock_pipeline,
retrieval_service=MagicMock(),
)
list(svc.run_crawl())
mock_pipeline.extract_structure.assert_not_called()

View File

@@ -0,0 +1,127 @@
"""Unit tests for crawlers — mock httpx responses."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from app.infrastructure.perception.crawlers.base import RawEvent, BaseCrawler
def test_raw_event_fields():
ev = RawEvent(
source="TEST",
source_label="Test",
standard_code="TST-001",
title="Test",
summary="Summary",
full_text_url="https://example.com",
status="enacted",
published_at="2026-01-01",
effective_at=None,
category="test",
tags=["a"],
raw_text="full text here",
)
assert ev.source == "TEST"
assert ev.tags == ["a"]
CATARC_HTML = """
<html><body>
<table>
<tr>
<td><a href="/std/detail/123">GB 18384-2025</a></td>
<td>电动汽车安全要求</td>
<td>2025-11-15</td>
<td>现行</td>
</tr>
<tr>
<td><a href="/std/detail/456">GB/T 40429-2026</a></td>
<td>汽车驾驶自动化分级</td>
<td>2026-02-01</td>
<td>即将实施</td>
</tr>
</table>
</body></html>
"""
def test_catarc_crawler_parses_html():
from app.infrastructure.perception.crawlers.catarc_crawler import CatarcCrawler
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = CATARC_HTML
mock_resp.raise_for_status = MagicMock()
with patch("httpx.get", return_value=mock_resp):
crawler = CatarcCrawler()
events = crawler.fetch(limit=10)
assert isinstance(events, list)
assert len(events) >= 1
assert all(isinstance(e, RawEvent) for e in events)
codes = [e.standard_code for e in events]
assert "GB 18384-2025" in codes
GUOBIAO_JSON = {
"rows": [
{
"std_code": "GB 18384-2025",
"std_name": "电动汽车安全要求",
"release_date": "2025-11-15",
"implement_date": "2026-07-01",
"std_status": "现行",
"std_type": "强制性",
},
]
}
def test_guobiao_crawler_parses_json():
from app.infrastructure.perception.crawlers.guobiao_crawler import GuobiaoMandatoryCrawler
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = GUOBIAO_JSON
mock_resp.raise_for_status = MagicMock()
with patch("httpx.get", return_value=mock_resp):
crawler = GuobiaoMandatoryCrawler()
events = crawler.fetch(limit=10)
assert len(events) >= 1
assert events[0].source == "国标委"
assert events[0].standard_code == "GB 18384-2025"
EURLEX_RSS = """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>EUR-Lex</title>
<item>
<title>Regulation (EU) 2024/1689 — AI Act</title>
<link>https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32024R1689</link>
<description>The EU Artificial Intelligence Act enters into force.</description>
<pubDate>Fri, 12 Jul 2024 00:00:00 GMT</pubDate>
</item>
</channel>
</rss>"""
def test_eurlex_crawler_parses_rss():
from app.infrastructure.perception.crawlers.eurlex_crawler import EurlexCrawler
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = EURLEX_RSS
mock_resp.content = EURLEX_RSS
mock_resp.raise_for_status = MagicMock()
with patch("httpx.get", return_value=mock_resp):
crawler = EurlexCrawler()
events = crawler.fetch(limit=5)
assert isinstance(events, list)
assert len(events) >= 1
assert events[0].source == "EUR-Lex"

View File

@@ -0,0 +1,77 @@
"""Unit tests for LlmPipeline — mock LLM client and embedding provider."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import json
import pytest
def _make_pipeline():
with patch("app.infrastructure.perception.llm_pipeline.get_llm_client") as mock_llm_fn, \
patch("app.infrastructure.perception.llm_pipeline.OpenAICompatibleEmbeddingProvider") as mock_emb_cls:
mock_client = MagicMock()
mock_client.chat.return_value = MagicMock(content='{"obligations":[{"text":"test obligation","deontic":"must","subject":"OEM","object":"system","condition":""}],"deadlines":[{"date":"2026-07-01","description":"实施截止"}],"scope":"适用于M1类车辆","penalties":"罚款","impact_level":"high"}')
mock_llm_fn.return_value = mock_client
mock_emb = MagicMock()
mock_emb.embed_texts.return_value = [[0.1] * 1024, [0.9] * 1024]
mock_emb_cls.return_value = mock_emb
from app.infrastructure.perception.llm_pipeline import LlmPipeline
return LlmPipeline(), mock_client, mock_emb
def test_extract_structure_returns_dict():
pipeline, mock_client, _ = _make_pipeline()
event = {
"id": "evt-001",
"standard_code": "GB 18384-2025",
"title": "电动汽车安全要求",
"summary": "新增 IP67 级别防护",
"source_label": "CATARC",
"tags": ["电池安全"],
}
result = pipeline.extract_structure(event)
assert isinstance(result, dict)
assert "obligations" in result
assert "impact_level" in result
def test_assess_impact_returns_list():
pipeline, mock_client, _ = _make_pipeline()
mock_client.chat.return_value = MagicMock(content='[{"doc_id":"d1","doc_name":"Safety Manual","score":0.85,"key_clauses":"§4.2","recommendation":"更新第4章"}]')
mock_retrieval = MagicMock()
chunk = MagicMock()
chunk.doc_id = "d1"
chunk.doc_title = "Safety Manual"
chunk.score = 0.85
chunk.text = "relevant text"
chunk.section_title = "§4.2"
mock_retrieval.retrieve.return_value = [chunk]
event = {
"standard_code": "GB 18384-2025",
"title": "电动汽车安全要求",
"obligations": [{"text": "OEM shall comply"}],
}
result = pipeline.assess_impact(event, mock_retrieval)
assert isinstance(result, list)
def test_compute_diff_no_change():
pipeline, _, mock_emb = _make_pipeline()
mock_emb.embed_texts.return_value = [[0.5] * 1024, [0.5] * 1024]
result = pipeline.compute_diff("paragraph one", "paragraph one")
assert isinstance(result, dict)
assert "changed_sections" in result
assert "change_summary" in result
def test_compute_diff_detects_change():
pipeline, mock_client, mock_emb = _make_pipeline()
mock_emb.embed_texts.return_value = [
[1.0] + [0.0] * 1023,
[0.0] + [1.0] + [0.0] * 1022,
]
mock_client.chat.return_value = MagicMock(content='{"change_type":"tightened","summary":"Requirement tightened"}')
result = pipeline.compute_diff("old paragraph text", "new tighter requirement text")
assert isinstance(result["changed_sections"], list)

View File

@@ -0,0 +1,98 @@
"""Unit tests for PostgresEventStore using a mocked psycopg2 pool."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import pytest
# Patch psycopg2 before importing the module under test
import sys
mock_psycopg2 = MagicMock()
mock_psycopg2.extras = MagicMock()
sys.modules.setdefault("psycopg2", mock_psycopg2)
sys.modules.setdefault("psycopg2.extras", mock_psycopg2.extras)
sys.modules.setdefault("psycopg2.pool", MagicMock())
from app.infrastructure.perception.base_event_store import BaseEventStore
SAMPLE_ROW = {
"id": "pg-001",
"source": "国标委",
"source_label": "国家标准化管理委员会",
"standard_code": "GB 18384-2025",
"title": "电动汽车安全要求",
"summary": "新增要求",
"full_text_url": "https://openstd.samr.gov.cn",
"status": "enacted",
"impact_level": "high",
"published_at": "2025-11-15",
"effective_at": "2026-07-01",
"category": "电动汽车安全",
"tags": ["电池安全"],
"obligations": None,
"deadlines": None,
"scope": None,
"penalties": None,
"content_hash": "abc123",
"previous_hash": None,
"change_summary": None,
"changed_sections": None,
"affected_docs": None,
"crawled_at": "2026-06-05T10:00:00+00:00",
"processed_at": None,
"raw_storage_key": None,
}
def _make_store_with_pool(mock_pool):
with patch("psycopg2.pool.ThreadedConnectionPool", return_value=mock_pool):
with patch(
"app.infrastructure.perception.postgres_event_store.PostgresEventStore._ensure_schema"
):
from app.infrastructure.perception.postgres_event_store import PostgresEventStore
return PostgresEventStore()
def _cursor_returning(rows):
cursor = MagicMock()
cursor.__enter__ = lambda s: s
cursor.__exit__ = MagicMock(return_value=False)
cursor.fetchall.return_value = rows
cursor.fetchone.return_value = rows[0] if rows else None
return cursor
def test_is_base_event_store():
mock_pool = MagicMock()
store = _make_store_with_pool(mock_pool)
assert isinstance(store, BaseEventStore)
def test_filter_returns_list():
mock_pool = MagicMock()
conn = MagicMock()
conn.__enter__ = lambda s: s
conn.__exit__ = MagicMock(return_value=False)
cursor = _cursor_returning([SAMPLE_ROW])
conn.cursor.return_value = cursor
mock_pool.getconn.return_value = conn
store = _make_store_with_pool(mock_pool)
result = store.filter(limit=10)
assert isinstance(result, list)
def test_stats_returns_correct_keys():
mock_pool = MagicMock()
conn = MagicMock()
conn.__enter__ = lambda s: s
conn.__exit__ = MagicMock(return_value=False)
cursor = MagicMock()
cursor.__enter__ = lambda s: s
cursor.__exit__ = MagicMock(return_value=False)
cursor.fetchone.return_value = {"count": 5}
conn.cursor.return_value = cursor
mock_pool.getconn.return_value = conn
store = _make_store_with_pool(mock_pool)
stats = store.stats()
for key in ("total", "high_impact", "medium_impact", "recent_90d"):
assert key in stats