fix somethings
This commit is contained in:
127
backend/tests/perception/test_crawlers.py
Normal file
127
backend/tests/perception/test_crawlers.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Unit tests for crawlers — mock httpx responses."""
|
||||
from __future__ import annotations
|
||||
from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
from app.infrastructure.perception.crawlers.base import RawEvent, BaseCrawler
|
||||
|
||||
|
||||
def test_raw_event_fields():
|
||||
ev = RawEvent(
|
||||
source="TEST",
|
||||
source_label="Test",
|
||||
standard_code="TST-001",
|
||||
title="Test",
|
||||
summary="Summary",
|
||||
full_text_url="https://example.com",
|
||||
status="enacted",
|
||||
published_at="2026-01-01",
|
||||
effective_at=None,
|
||||
category="test",
|
||||
tags=["a"],
|
||||
raw_text="full text here",
|
||||
)
|
||||
assert ev.source == "TEST"
|
||||
assert ev.tags == ["a"]
|
||||
|
||||
|
||||
CATARC_HTML = """
|
||||
<html><body>
|
||||
<table>
|
||||
<tr>
|
||||
<td><a href="/std/detail/123">GB 18384-2025</a></td>
|
||||
<td>电动汽车安全要求</td>
|
||||
<td>2025-11-15</td>
|
||||
<td>现行</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="/std/detail/456">GB/T 40429-2026</a></td>
|
||||
<td>汽车驾驶自动化分级</td>
|
||||
<td>2026-02-01</td>
|
||||
<td>即将实施</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body></html>
|
||||
"""
|
||||
|
||||
|
||||
def test_catarc_crawler_parses_html():
|
||||
from app.infrastructure.perception.crawlers.catarc_crawler import CatarcCrawler
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = CATARC_HTML
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("httpx.get", return_value=mock_resp):
|
||||
crawler = CatarcCrawler()
|
||||
events = crawler.fetch(limit=10)
|
||||
|
||||
assert isinstance(events, list)
|
||||
assert len(events) >= 1
|
||||
assert all(isinstance(e, RawEvent) for e in events)
|
||||
codes = [e.standard_code for e in events]
|
||||
assert "GB 18384-2025" in codes
|
||||
|
||||
|
||||
GUOBIAO_JSON = {
|
||||
"rows": [
|
||||
{
|
||||
"std_code": "GB 18384-2025",
|
||||
"std_name": "电动汽车安全要求",
|
||||
"release_date": "2025-11-15",
|
||||
"implement_date": "2026-07-01",
|
||||
"std_status": "现行",
|
||||
"std_type": "强制性",
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_guobiao_crawler_parses_json():
|
||||
from app.infrastructure.perception.crawlers.guobiao_crawler import GuobiaoMandatoryCrawler
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = GUOBIAO_JSON
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("httpx.get", return_value=mock_resp):
|
||||
crawler = GuobiaoMandatoryCrawler()
|
||||
events = crawler.fetch(limit=10)
|
||||
|
||||
assert len(events) >= 1
|
||||
assert events[0].source == "国标委"
|
||||
assert events[0].standard_code == "GB 18384-2025"
|
||||
|
||||
|
||||
EURLEX_RSS = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>EUR-Lex</title>
|
||||
<item>
|
||||
<title>Regulation (EU) 2024/1689 — AI Act</title>
|
||||
<link>https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32024R1689</link>
|
||||
<description>The EU Artificial Intelligence Act enters into force.</description>
|
||||
<pubDate>Fri, 12 Jul 2024 00:00:00 GMT</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>"""
|
||||
|
||||
|
||||
def test_eurlex_crawler_parses_rss():
|
||||
from app.infrastructure.perception.crawlers.eurlex_crawler import EurlexCrawler
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = EURLEX_RSS
|
||||
mock_resp.content = EURLEX_RSS
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("httpx.get", return_value=mock_resp):
|
||||
crawler = EurlexCrawler()
|
||||
events = crawler.fetch(limit=5)
|
||||
|
||||
assert isinstance(events, list)
|
||||
assert len(events) >= 1
|
||||
assert events[0].source == "EUR-Lex"
|
||||
Reference in New Issue
Block a user