48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
|
|
"""Self-contained text helpers for the web layer.
|
||
|
|
|
||
|
|
These intentionally avoid importing from rag_eval so the web server has no
|
||
|
|
import-time dependency on the evaluation engine (and therefore boots even when
|
||
|
|
ragas is unavailable). The contexts parser mirrors rag_eval.shared.utils so the
|
||
|
|
console interprets serialized CSV context columns the same way the engine does.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import ast
|
||
|
|
import json
|
||
|
|
import math
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
|
||
|
|
def parse_contexts(value: Any) -> list[str]:
|
||
|
|
"""Normalize a context payload into a list of non-empty strings.
|
||
|
|
|
||
|
|
Accepts native lists, JSON/Python-literal serialized lists (as written into
|
||
|
|
scores.csv), and plain text, mirroring the engine's own parsing rules.
|
||
|
|
"""
|
||
|
|
if isinstance(value, list):
|
||
|
|
return [str(item).strip() for item in value if str(item).strip()]
|
||
|
|
if value is None or (isinstance(value, float) and math.isnan(value)):
|
||
|
|
return []
|
||
|
|
|
||
|
|
text = str(value).strip()
|
||
|
|
if not text:
|
||
|
|
return []
|
||
|
|
|
||
|
|
# Accept serialized lists from CSV exports before falling back to plain text.
|
||
|
|
for parser in (json.loads, ast.literal_eval):
|
||
|
|
try:
|
||
|
|
parsed = parser(text)
|
||
|
|
except (ValueError, SyntaxError, json.JSONDecodeError):
|
||
|
|
continue
|
||
|
|
if isinstance(parsed, list):
|
||
|
|
return [str(item).strip() for item in parsed if str(item).strip()]
|
||
|
|
|
||
|
|
# Preserve paragraph-style context dumps by splitting on blank lines first.
|
||
|
|
if "\n\n" in text:
|
||
|
|
chunks = [chunk.strip() for chunk in text.split("\n\n") if chunk.strip()]
|
||
|
|
if chunks:
|
||
|
|
return chunks
|
||
|
|
|
||
|
|
return [text]
|