fix somethings
This commit is contained in:
0
backend/tests/perception/__init__.py
Normal file
0
backend/tests/perception/__init__.py
Normal file
95
backend/tests/perception/test_base_event_store.py
Normal file
95
backend/tests/perception/test_base_event_store.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Contract tests: any BaseEventStore implementation must pass these."""
|
||||
from app.infrastructure.perception.base_event_store import BaseEventStore
|
||||
from app.infrastructure.perception.mock_event_store import MockEventStore
|
||||
|
||||
|
||||
def _store() -> BaseEventStore:
|
||||
return MockEventStore()
|
||||
|
||||
|
||||
def test_is_base_event_store():
|
||||
assert isinstance(_store(), BaseEventStore)
|
||||
|
||||
|
||||
def test_all_returns_list():
|
||||
result = _store().all()
|
||||
assert isinstance(result, list)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_get_known_id():
|
||||
store = _store()
|
||||
first = store.all()[0]
|
||||
result = store.get(first["id"])
|
||||
assert result is not None
|
||||
assert result["id"] == first["id"]
|
||||
|
||||
|
||||
def test_get_unknown_returns_none():
|
||||
assert _store().get("does-not-exist") is None
|
||||
|
||||
|
||||
def test_filter_by_impact():
|
||||
store = _store()
|
||||
highs = store.filter(impact_level="high", limit=100)
|
||||
assert all(e["impact_level"] == "high" for e in highs)
|
||||
|
||||
|
||||
def test_filter_limit():
|
||||
store = _store()
|
||||
result = store.filter(limit=3)
|
||||
assert len(result) <= 3
|
||||
|
||||
|
||||
def test_stats_keys():
|
||||
stats = _store().stats()
|
||||
for key in ("total", "high_impact", "medium_impact", "recent_90d"):
|
||||
assert key in stats, f"missing key: {key}"
|
||||
|
||||
|
||||
def test_upsert_and_get():
|
||||
store = _store()
|
||||
event = {
|
||||
"id": "test-upsert-001",
|
||||
"source": "TEST",
|
||||
"source_label": "Test Source",
|
||||
"standard_code": "TST-001",
|
||||
"title": "Test Event",
|
||||
"summary": "A test event",
|
||||
"full_text_url": "https://example.com",
|
||||
"status": "draft",
|
||||
"impact_level": "low",
|
||||
"published_at": "2026-01-01",
|
||||
"effective_at": None,
|
||||
"category": "test",
|
||||
"tags": ["test"],
|
||||
"content_hash": "abc123",
|
||||
"previous_hash": None,
|
||||
}
|
||||
store.upsert(event)
|
||||
result = store.get("test-upsert-001")
|
||||
assert result is not None
|
||||
assert result["title"] == "Test Event"
|
||||
|
||||
|
||||
def test_get_by_standard_code():
|
||||
store = _store()
|
||||
first = store.all()[0]
|
||||
result = store.get_by_standard_code(first["standard_code"])
|
||||
assert result is not None
|
||||
assert result["standard_code"] == first["standard_code"]
|
||||
|
||||
|
||||
def test_upsert_updates_existing():
|
||||
store = _store()
|
||||
first = store.all()[0]
|
||||
original_id = first["id"]
|
||||
store.upsert({"id": original_id, "title": "Updated Title", "impact_level": first["impact_level"],
|
||||
"standard_code": first.get("standard_code", ""), "source": first["source"],
|
||||
"source_label": first.get("source_label", ""), "summary": "Updated",
|
||||
"full_text_url": "", "status": first["status"], "published_at": first.get("published_at", ""),
|
||||
"effective_at": None, "category": first.get("category", ""), "tags": [],
|
||||
"content_hash": "newhash", "previous_hash": None})
|
||||
result = store.get(original_id)
|
||||
assert result is not None
|
||||
assert result["title"] == "Updated Title"
|
||||
111
backend/tests/perception/test_crawl_service.py
Normal file
111
backend/tests/perception/test_crawl_service.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Integration tests for CrawlService."""
|
||||
from __future__ import annotations
|
||||
from unittest.mock import MagicMock
|
||||
import hashlib
|
||||
import pytest
|
||||
|
||||
from app.infrastructure.perception.crawlers.base import RawEvent
|
||||
from app.infrastructure.perception.mock_event_store import MockEventStore
|
||||
|
||||
|
||||
def _make_raw_event(code="TST-001"):
|
||||
return RawEvent(
|
||||
source="TEST", source_label="Test", standard_code=code,
|
||||
title=f"Test {code}", summary="Summary", full_text_url="https://example.com",
|
||||
status="enacted", published_at="2026-01-01", effective_at=None,
|
||||
category="test", tags=["test"], raw_text="full text",
|
||||
)
|
||||
|
||||
|
||||
def _make_service(raw_events):
|
||||
from app.application.perception.crawl_service import CrawlService
|
||||
|
||||
mock_crawler = MagicMock()
|
||||
mock_crawler.fetch.return_value = raw_events
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.extract_structure.return_value = {
|
||||
"obligations": [], "deadlines": [], "scope": "test",
|
||||
"penalties": None, "impact_level": "low",
|
||||
}
|
||||
mock_pipeline.assess_impact.return_value = []
|
||||
mock_pipeline.compute_diff.return_value = {
|
||||
"changed_sections": [], "change_summary": "No changes.",
|
||||
}
|
||||
|
||||
mock_retrieval = MagicMock()
|
||||
store = MockEventStore()
|
||||
|
||||
return CrawlService(
|
||||
crawlers={"TEST": mock_crawler},
|
||||
event_store=store,
|
||||
llm_pipeline=mock_pipeline,
|
||||
retrieval_service=mock_retrieval,
|
||||
)
|
||||
|
||||
|
||||
def test_crawl_yields_progress_and_done():
|
||||
svc = _make_service([_make_raw_event("TST-001")])
|
||||
events = list(svc.run_crawl())
|
||||
event_types = [e.get("event") for e in events]
|
||||
assert "done" in event_types
|
||||
|
||||
|
||||
def test_crawl_upserts_to_store():
|
||||
store = MockEventStore()
|
||||
from app.application.perception.crawl_service import CrawlService
|
||||
mock_crawler = MagicMock()
|
||||
mock_crawler.fetch.return_value = [_make_raw_event("NEW-001")]
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.extract_structure.return_value = {
|
||||
"obligations": [], "deadlines": [], "scope": "",
|
||||
"penalties": None, "impact_level": "medium",
|
||||
}
|
||||
mock_pipeline.assess_impact.return_value = []
|
||||
mock_pipeline.compute_diff.return_value = {
|
||||
"changed_sections": [], "change_summary": "",
|
||||
}
|
||||
svc = CrawlService(
|
||||
crawlers={"TEST": mock_crawler},
|
||||
event_store=store,
|
||||
llm_pipeline=mock_pipeline,
|
||||
retrieval_service=MagicMock(),
|
||||
)
|
||||
list(svc.run_crawl())
|
||||
result = store.get_by_standard_code("NEW-001")
|
||||
assert result is not None
|
||||
assert result["title"] == "Test NEW-001"
|
||||
|
||||
|
||||
def test_crawl_skips_unchanged_events():
|
||||
store = MockEventStore()
|
||||
raw = _make_raw_event("SKIP-001")
|
||||
content_hash = hashlib.sha256(raw.raw_text.encode()).hexdigest()
|
||||
store.upsert({
|
||||
"id": hashlib.sha256(f"TEST-SKIP-001".encode()).hexdigest()[:12],
|
||||
"standard_code": "SKIP-001",
|
||||
"source": "TEST",
|
||||
"source_label": "Test",
|
||||
"title": "Test SKIP-001",
|
||||
"summary": "",
|
||||
"full_text_url": "",
|
||||
"status": "enacted",
|
||||
"impact_level": "low",
|
||||
"published_at": "2026-01-01",
|
||||
"effective_at": None,
|
||||
"category": "test",
|
||||
"tags": [],
|
||||
"content_hash": content_hash,
|
||||
})
|
||||
mock_pipeline = MagicMock()
|
||||
from app.application.perception.crawl_service import CrawlService
|
||||
mock_crawler = MagicMock()
|
||||
mock_crawler.fetch.return_value = [raw]
|
||||
svc = CrawlService(
|
||||
crawlers={"TEST": mock_crawler},
|
||||
event_store=store,
|
||||
llm_pipeline=mock_pipeline,
|
||||
retrieval_service=MagicMock(),
|
||||
)
|
||||
list(svc.run_crawl())
|
||||
mock_pipeline.extract_structure.assert_not_called()
|
||||
127
backend/tests/perception/test_crawlers.py
Normal file
127
backend/tests/perception/test_crawlers.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Unit tests for crawlers — mock httpx responses."""
|
||||
from __future__ import annotations
|
||||
from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
from app.infrastructure.perception.crawlers.base import RawEvent, BaseCrawler
|
||||
|
||||
|
||||
def test_raw_event_fields():
|
||||
ev = RawEvent(
|
||||
source="TEST",
|
||||
source_label="Test",
|
||||
standard_code="TST-001",
|
||||
title="Test",
|
||||
summary="Summary",
|
||||
full_text_url="https://example.com",
|
||||
status="enacted",
|
||||
published_at="2026-01-01",
|
||||
effective_at=None,
|
||||
category="test",
|
||||
tags=["a"],
|
||||
raw_text="full text here",
|
||||
)
|
||||
assert ev.source == "TEST"
|
||||
assert ev.tags == ["a"]
|
||||
|
||||
|
||||
CATARC_HTML = """
|
||||
<html><body>
|
||||
<table>
|
||||
<tr>
|
||||
<td><a href="/std/detail/123">GB 18384-2025</a></td>
|
||||
<td>电动汽车安全要求</td>
|
||||
<td>2025-11-15</td>
|
||||
<td>现行</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="/std/detail/456">GB/T 40429-2026</a></td>
|
||||
<td>汽车驾驶自动化分级</td>
|
||||
<td>2026-02-01</td>
|
||||
<td>即将实施</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body></html>
|
||||
"""
|
||||
|
||||
|
||||
def test_catarc_crawler_parses_html():
|
||||
from app.infrastructure.perception.crawlers.catarc_crawler import CatarcCrawler
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = CATARC_HTML
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("httpx.get", return_value=mock_resp):
|
||||
crawler = CatarcCrawler()
|
||||
events = crawler.fetch(limit=10)
|
||||
|
||||
assert isinstance(events, list)
|
||||
assert len(events) >= 1
|
||||
assert all(isinstance(e, RawEvent) for e in events)
|
||||
codes = [e.standard_code for e in events]
|
||||
assert "GB 18384-2025" in codes
|
||||
|
||||
|
||||
GUOBIAO_JSON = {
|
||||
"rows": [
|
||||
{
|
||||
"std_code": "GB 18384-2025",
|
||||
"std_name": "电动汽车安全要求",
|
||||
"release_date": "2025-11-15",
|
||||
"implement_date": "2026-07-01",
|
||||
"std_status": "现行",
|
||||
"std_type": "强制性",
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_guobiao_crawler_parses_json():
|
||||
from app.infrastructure.perception.crawlers.guobiao_crawler import GuobiaoMandatoryCrawler
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = GUOBIAO_JSON
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("httpx.get", return_value=mock_resp):
|
||||
crawler = GuobiaoMandatoryCrawler()
|
||||
events = crawler.fetch(limit=10)
|
||||
|
||||
assert len(events) >= 1
|
||||
assert events[0].source == "国标委"
|
||||
assert events[0].standard_code == "GB 18384-2025"
|
||||
|
||||
|
||||
EURLEX_RSS = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>EUR-Lex</title>
|
||||
<item>
|
||||
<title>Regulation (EU) 2024/1689 — AI Act</title>
|
||||
<link>https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32024R1689</link>
|
||||
<description>The EU Artificial Intelligence Act enters into force.</description>
|
||||
<pubDate>Fri, 12 Jul 2024 00:00:00 GMT</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>"""
|
||||
|
||||
|
||||
def test_eurlex_crawler_parses_rss():
|
||||
from app.infrastructure.perception.crawlers.eurlex_crawler import EurlexCrawler
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = EURLEX_RSS
|
||||
mock_resp.content = EURLEX_RSS
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("httpx.get", return_value=mock_resp):
|
||||
crawler = EurlexCrawler()
|
||||
events = crawler.fetch(limit=5)
|
||||
|
||||
assert isinstance(events, list)
|
||||
assert len(events) >= 1
|
||||
assert events[0].source == "EUR-Lex"
|
||||
77
backend/tests/perception/test_llm_pipeline.py
Normal file
77
backend/tests/perception/test_llm_pipeline.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Unit tests for LlmPipeline — mock LLM client and embedding provider."""
|
||||
from __future__ import annotations
|
||||
from unittest.mock import MagicMock, patch
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_pipeline():
|
||||
with patch("app.infrastructure.perception.llm_pipeline.get_llm_client") as mock_llm_fn, \
|
||||
patch("app.infrastructure.perception.llm_pipeline.OpenAICompatibleEmbeddingProvider") as mock_emb_cls:
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.return_value = MagicMock(content='{"obligations":[{"text":"test obligation","deontic":"must","subject":"OEM","object":"system","condition":""}],"deadlines":[{"date":"2026-07-01","description":"实施截止"}],"scope":"适用于M1类车辆","penalties":"罚款","impact_level":"high"}')
|
||||
mock_llm_fn.return_value = mock_client
|
||||
|
||||
mock_emb = MagicMock()
|
||||
mock_emb.embed_texts.return_value = [[0.1] * 1024, [0.9] * 1024]
|
||||
mock_emb_cls.return_value = mock_emb
|
||||
|
||||
from app.infrastructure.perception.llm_pipeline import LlmPipeline
|
||||
return LlmPipeline(), mock_client, mock_emb
|
||||
|
||||
|
||||
def test_extract_structure_returns_dict():
|
||||
pipeline, mock_client, _ = _make_pipeline()
|
||||
event = {
|
||||
"id": "evt-001",
|
||||
"standard_code": "GB 18384-2025",
|
||||
"title": "电动汽车安全要求",
|
||||
"summary": "新增 IP67 级别防护",
|
||||
"source_label": "CATARC",
|
||||
"tags": ["电池安全"],
|
||||
}
|
||||
result = pipeline.extract_structure(event)
|
||||
assert isinstance(result, dict)
|
||||
assert "obligations" in result
|
||||
assert "impact_level" in result
|
||||
|
||||
|
||||
def test_assess_impact_returns_list():
|
||||
pipeline, mock_client, _ = _make_pipeline()
|
||||
mock_client.chat.return_value = MagicMock(content='[{"doc_id":"d1","doc_name":"Safety Manual","score":0.85,"key_clauses":"§4.2","recommendation":"更新第4章"}]')
|
||||
mock_retrieval = MagicMock()
|
||||
chunk = MagicMock()
|
||||
chunk.doc_id = "d1"
|
||||
chunk.doc_title = "Safety Manual"
|
||||
chunk.score = 0.85
|
||||
chunk.text = "relevant text"
|
||||
chunk.section_title = "§4.2"
|
||||
mock_retrieval.retrieve.return_value = [chunk]
|
||||
event = {
|
||||
"standard_code": "GB 18384-2025",
|
||||
"title": "电动汽车安全要求",
|
||||
"obligations": [{"text": "OEM shall comply"}],
|
||||
}
|
||||
result = pipeline.assess_impact(event, mock_retrieval)
|
||||
assert isinstance(result, list)
|
||||
|
||||
|
||||
def test_compute_diff_no_change():
|
||||
pipeline, _, mock_emb = _make_pipeline()
|
||||
mock_emb.embed_texts.return_value = [[0.5] * 1024, [0.5] * 1024]
|
||||
result = pipeline.compute_diff("paragraph one", "paragraph one")
|
||||
assert isinstance(result, dict)
|
||||
assert "changed_sections" in result
|
||||
assert "change_summary" in result
|
||||
|
||||
|
||||
def test_compute_diff_detects_change():
|
||||
pipeline, mock_client, mock_emb = _make_pipeline()
|
||||
mock_emb.embed_texts.return_value = [
|
||||
[1.0] + [0.0] * 1023,
|
||||
[0.0] + [1.0] + [0.0] * 1022,
|
||||
]
|
||||
mock_client.chat.return_value = MagicMock(content='{"change_type":"tightened","summary":"Requirement tightened"}')
|
||||
result = pipeline.compute_diff("old paragraph text", "new tighter requirement text")
|
||||
assert isinstance(result["changed_sections"], list)
|
||||
98
backend/tests/perception/test_postgres_event_store.py
Normal file
98
backend/tests/perception/test_postgres_event_store.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""Unit tests for PostgresEventStore using a mocked psycopg2 pool."""
|
||||
from __future__ import annotations
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
# Patch psycopg2 before importing the module under test
|
||||
import sys
|
||||
mock_psycopg2 = MagicMock()
|
||||
mock_psycopg2.extras = MagicMock()
|
||||
sys.modules.setdefault("psycopg2", mock_psycopg2)
|
||||
sys.modules.setdefault("psycopg2.extras", mock_psycopg2.extras)
|
||||
sys.modules.setdefault("psycopg2.pool", MagicMock())
|
||||
|
||||
from app.infrastructure.perception.base_event_store import BaseEventStore
|
||||
|
||||
|
||||
SAMPLE_ROW = {
|
||||
"id": "pg-001",
|
||||
"source": "国标委",
|
||||
"source_label": "国家标准化管理委员会",
|
||||
"standard_code": "GB 18384-2025",
|
||||
"title": "电动汽车安全要求",
|
||||
"summary": "新增要求",
|
||||
"full_text_url": "https://openstd.samr.gov.cn",
|
||||
"status": "enacted",
|
||||
"impact_level": "high",
|
||||
"published_at": "2025-11-15",
|
||||
"effective_at": "2026-07-01",
|
||||
"category": "电动汽车安全",
|
||||
"tags": ["电池安全"],
|
||||
"obligations": None,
|
||||
"deadlines": None,
|
||||
"scope": None,
|
||||
"penalties": None,
|
||||
"content_hash": "abc123",
|
||||
"previous_hash": None,
|
||||
"change_summary": None,
|
||||
"changed_sections": None,
|
||||
"affected_docs": None,
|
||||
"crawled_at": "2026-06-05T10:00:00+00:00",
|
||||
"processed_at": None,
|
||||
"raw_storage_key": None,
|
||||
}
|
||||
|
||||
|
||||
def _make_store_with_pool(mock_pool):
|
||||
with patch("psycopg2.pool.ThreadedConnectionPool", return_value=mock_pool):
|
||||
with patch(
|
||||
"app.infrastructure.perception.postgres_event_store.PostgresEventStore._ensure_schema"
|
||||
):
|
||||
from app.infrastructure.perception.postgres_event_store import PostgresEventStore
|
||||
return PostgresEventStore()
|
||||
|
||||
|
||||
def _cursor_returning(rows):
|
||||
cursor = MagicMock()
|
||||
cursor.__enter__ = lambda s: s
|
||||
cursor.__exit__ = MagicMock(return_value=False)
|
||||
cursor.fetchall.return_value = rows
|
||||
cursor.fetchone.return_value = rows[0] if rows else None
|
||||
return cursor
|
||||
|
||||
|
||||
def test_is_base_event_store():
|
||||
mock_pool = MagicMock()
|
||||
store = _make_store_with_pool(mock_pool)
|
||||
assert isinstance(store, BaseEventStore)
|
||||
|
||||
|
||||
def test_filter_returns_list():
|
||||
mock_pool = MagicMock()
|
||||
conn = MagicMock()
|
||||
conn.__enter__ = lambda s: s
|
||||
conn.__exit__ = MagicMock(return_value=False)
|
||||
cursor = _cursor_returning([SAMPLE_ROW])
|
||||
conn.cursor.return_value = cursor
|
||||
mock_pool.getconn.return_value = conn
|
||||
store = _make_store_with_pool(mock_pool)
|
||||
result = store.filter(limit=10)
|
||||
assert isinstance(result, list)
|
||||
|
||||
|
||||
def test_stats_returns_correct_keys():
|
||||
mock_pool = MagicMock()
|
||||
conn = MagicMock()
|
||||
conn.__enter__ = lambda s: s
|
||||
conn.__exit__ = MagicMock(return_value=False)
|
||||
cursor = MagicMock()
|
||||
cursor.__enter__ = lambda s: s
|
||||
cursor.__exit__ = MagicMock(return_value=False)
|
||||
cursor.fetchone.return_value = {"count": 5}
|
||||
conn.cursor.return_value = cursor
|
||||
mock_pool.getconn.return_value = conn
|
||||
store = _make_store_with_pool(mock_pool)
|
||||
stats = store.stats()
|
||||
for key in ("total", "high_impact", "medium_impact", "recent_90d"):
|
||||
assert key in stats
|
||||
Reference in New Issue
Block a user