"""Shared utility functions for crawlers.""" from __future__ import annotations import re from datetime import date def parse_date(text: str) -> str: """Return YYYY-MM-DD from common Chinese date formats, or today's date.""" text = text.strip() if not text: return date.today().isoformat() m = re.search(r"(\d{4})[/-](\d{1,2})[/-](\d{1,2})", text) if m: try: return date(int(m.group(1)), int(m.group(2)), int(m.group(3))).isoformat() except ValueError: pass m2 = re.search(r"(\d{4})年(\d{1,2})月(\d{1,2})日?", text) if m2: try: return date(int(m2.group(1)), int(m2.group(2)), int(m2.group(3))).isoformat() except ValueError: pass return date.today().isoformat() def extract_tags(standard_code: str, title: str) -> list[str]: """Derive simple keyword tags from standard code and title.""" tags: list[str] = [] code_upper = standard_code.upper() if "GB" in code_upper: tags.append("国家标准") if "/T" in code_upper: tags.append("推荐性") else: tags.append("强制性") keywords = ["电动", "安全", "自动驾驶", "充电", "智能网联", "碰撞", "排放", "网络安全"] for kw in keywords: if kw in title: tags.append(kw) return tags[:5]