"""Unit tests for crawlers — mock httpx responses.""" from __future__ import annotations from unittest.mock import MagicMock, patch import pytest from app.infrastructure.perception.crawlers.base import RawEvent, BaseCrawler def test_raw_event_fields(): ev = RawEvent( source="TEST", source_label="Test", standard_code="TST-001", title="Test", summary="Summary", full_text_url="https://example.com", status="enacted", published_at="2026-01-01", effective_at=None, category="test", tags=["a"], raw_text="full text here", ) assert ev.source == "TEST" assert ev.tags == ["a"] CATARC_HTML = """
GB 18384-2025 电动汽车安全要求 2025-11-15 现行
GB/T 40429-2026 汽车驾驶自动化分级 2026-02-01 即将实施
""" def test_catarc_crawler_parses_html(): from app.infrastructure.perception.crawlers.catarc_crawler import CatarcCrawler mock_resp = MagicMock() mock_resp.status_code = 200 mock_resp.text = CATARC_HTML mock_resp.raise_for_status = MagicMock() with patch("httpx.get", return_value=mock_resp): crawler = CatarcCrawler() events = crawler.fetch(limit=10) assert isinstance(events, list) assert len(events) >= 1 assert all(isinstance(e, RawEvent) for e in events) codes = [e.standard_code for e in events] assert "GB 18384-2025" in codes GUOBIAO_JSON = { "rows": [ { "std_code": "GB 18384-2025", "std_name": "电动汽车安全要求", "release_date": "2025-11-15", "implement_date": "2026-07-01", "std_status": "现行", "std_type": "强制性", }, ] } def test_guobiao_crawler_parses_json(): from app.infrastructure.perception.crawlers.guobiao_crawler import GuobiaoMandatoryCrawler mock_resp = MagicMock() mock_resp.status_code = 200 mock_resp.json.return_value = GUOBIAO_JSON mock_resp.raise_for_status = MagicMock() with patch("httpx.get", return_value=mock_resp): crawler = GuobiaoMandatoryCrawler() events = crawler.fetch(limit=10) assert len(events) >= 1 assert events[0].source == "国标委" assert events[0].standard_code == "GB 18384-2025" EURLEX_RSS = """ EUR-Lex Regulation (EU) 2024/1689 — AI Act https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32024R1689 The EU Artificial Intelligence Act enters into force. Fri, 12 Jul 2024 00:00:00 GMT """ def test_eurlex_crawler_parses_rss(): from app.infrastructure.perception.crawlers.eurlex_crawler import EurlexCrawler mock_resp = MagicMock() mock_resp.status_code = 200 mock_resp.text = EURLEX_RSS mock_resp.content = EURLEX_RSS mock_resp.raise_for_status = MagicMock() with patch("httpx.get", return_value=mock_resp): crawler = EurlexCrawler() events = crawler.fetch(limit=5) assert isinstance(events, list) assert len(events) >= 1 assert events[0].source == "EUR-Lex"