feat(advisor): add 0.85 advisory threshold triggering LLM suggestions
- Add advisory_threshold=0.85 field to MetricRule (higher-is-better metrics) - diagnose() now emits severity='low' for scores in (warning_threshold, 0.85) - noise_sensitivity (lower-is-better) keeps its existing two-tier thresholds - writer.py: severity labels mapped to Chinese (严重/警告/待优化) - llm_analyzer.py: prompt explains low/warning/critical tiers in Chinese - Tests: 5 new cases for 'low' severity, updated log summary assertions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -10,10 +10,38 @@ class TestDiagnosis(unittest.TestCase):
|
||||
for i, s in enumerate(scores)]
|
||||
|
||||
def test_no_diagnosis_when_all_scores_above_threshold(self):
|
||||
# Mean exactly 0.85 should NOT trigger any diagnosis (< 0.85 is the condition).
|
||||
rows = self._make_rows("faithfulness", [0.8, 0.9, 0.85])
|
||||
result = diagnose(rows, metrics=["faithfulness"])
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_no_diagnosis_when_mean_above_advisory_threshold(self):
|
||||
rows = self._make_rows("answer_relevancy", [0.9, 0.92, 0.88])
|
||||
result = diagnose(rows, metrics=["answer_relevancy"])
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_low_severity_when_mean_below_advisory_threshold(self):
|
||||
# Score between warning_threshold (0.7) and advisory_threshold (0.85) → "low"
|
||||
rows = self._make_rows("faithfulness", [0.78, 0.80, 0.82])
|
||||
result = diagnose(rows, metrics=["faithfulness"])
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0].severity, "low")
|
||||
self.assertAlmostEqual(result[0].threshold, 0.85, places=2)
|
||||
|
||||
def test_low_severity_answer_relevancy_at_0_84(self):
|
||||
rows = self._make_rows("answer_relevancy", [0.84, 0.84, 0.84])
|
||||
result = diagnose(rows, metrics=["answer_relevancy"])
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0].severity, "low")
|
||||
|
||||
def test_low_severity_has_root_causes_and_actions(self):
|
||||
rows = self._make_rows("context_precision", [0.75, 0.76, 0.77])
|
||||
result = diagnose(rows, metrics=["context_precision"])
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0].severity, "low")
|
||||
self.assertTrue(len(result[0].root_causes) > 0)
|
||||
self.assertTrue(len(result[0].suggested_actions) > 0)
|
||||
|
||||
def test_warning_when_mean_below_warning_threshold(self):
|
||||
rows = self._make_rows("faithfulness", [0.65, 0.62, 0.68])
|
||||
result = diagnose(rows, metrics=["faithfulness"])
|
||||
|
||||
Reference in New Issue
Block a user