fix somethings
This commit is contained in:
@@ -4,10 +4,12 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from app.shared.bootstrap import get_perception_service
|
||||
from app.shared.bootstrap import get_crawl_service, get_event_store, get_perception_service
|
||||
from app.api.dependencies.auth import get_current_user
|
||||
from app.domain.auth.models import UserClaims
|
||||
from app.shared.async_utils import iter_in_thread
|
||||
|
||||
router = APIRouter(prefix="/perception", tags=["智能感知"])
|
||||
@@ -65,3 +67,77 @@ async def analyze_event(event_id: str):
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/crawl")
|
||||
async def run_crawl(
|
||||
body: dict = None,
|
||||
current_user: UserClaims = Depends(get_current_user),
|
||||
):
|
||||
"""Trigger manual crawl of regulatory sources. Streams SSE progress.
|
||||
|
||||
Body (optional): {"sources": ["CATARC", "国标委·强制性", "EUR-Lex"]}
|
||||
Omit sources to crawl all registered sources.
|
||||
"""
|
||||
sources: list[str] | None = (body or {}).get("sources")
|
||||
crawl_svc = get_crawl_service()
|
||||
|
||||
async def crawl_stream():
|
||||
async for item in iter_in_thread(crawl_svc.run_crawl(sources=sources)):
|
||||
event_name = item.get("event", "message")
|
||||
data = item.get("data", "")
|
||||
if isinstance(data, (dict, list)):
|
||||
data = json.dumps(data, ensure_ascii=False)
|
||||
yield f"event: {event_name}\ndata: {data}\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
crawl_stream(),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/events/{event_id}/process")
|
||||
async def process_event(
|
||||
event_id: str,
|
||||
current_user: UserClaims = Depends(get_current_user),
|
||||
):
|
||||
"""Trigger LLM pipeline (extract + assess + diff) for a single event."""
|
||||
from datetime import UTC, datetime
|
||||
from app.infrastructure.perception.llm_pipeline import LlmPipeline
|
||||
from app.shared.bootstrap import get_retrieval_service
|
||||
|
||||
event = get_perception_service().get_event(event_id)
|
||||
if not event:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
|
||||
|
||||
store = get_event_store()
|
||||
pipeline = LlmPipeline()
|
||||
|
||||
structure = pipeline.extract_structure(event)
|
||||
event.update(structure)
|
||||
event["affected_docs"] = pipeline.assess_impact(event, get_retrieval_service())
|
||||
event["processed_at"] = datetime.now(UTC).isoformat()
|
||||
store.upsert(event)
|
||||
|
||||
return {"status": "ok", "event_id": event_id, "processed_at": event["processed_at"]}
|
||||
|
||||
|
||||
@router.get("/events/{event_id}/diff")
|
||||
async def get_event_diff(event_id: str):
|
||||
"""Return semantic diff detail for an event (only available if previously crawled twice)."""
|
||||
event = get_perception_service().get_event(event_id)
|
||||
if not event:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
|
||||
if not event.get("change_summary"):
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=404, detail="No diff available for this event")
|
||||
return {
|
||||
"event_id": event_id,
|
||||
"change_summary": event.get("change_summary"),
|
||||
"changed_sections": event.get("changed_sections") or [],
|
||||
"previous_hash": event.get("previous_hash"),
|
||||
"content_hash": event.get("content_hash"),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user