fix somethings
This commit is contained in:
32
backend/app/infrastructure/perception/crawlers/base.py
Normal file
32
backend/app/infrastructure/perception/crawlers/base.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""Shared contracts for regulatory source crawlers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class RawEvent:
|
||||
"""Raw regulatory event returned by a crawler before enrichment."""
|
||||
|
||||
source: str
|
||||
source_label: str
|
||||
standard_code: str
|
||||
title: str
|
||||
summary: str
|
||||
full_text_url: str
|
||||
status: str # 'enacted' | 'draft' | 'consultation'
|
||||
published_at: str # YYYY-MM-DD string
|
||||
effective_at: str | None
|
||||
category: str
|
||||
tags: list[str] = field(default_factory=list)
|
||||
raw_text: str = "" # full crawled text for hashing + LLM
|
||||
|
||||
|
||||
class BaseCrawler(ABC):
|
||||
"""Abstract regulatory source crawler."""
|
||||
|
||||
@abstractmethod
|
||||
def fetch(self, limit: int = 50) -> list[RawEvent]:
|
||||
"""Fetch up to `limit` recent events from the data source."""
|
||||
Reference in New Issue
Block a user