- Fixed auto_handoff: added default markers (DECISION/TODO/BLOCKED/QUESTION) - Fixed enhanced_search: internal imports use cortex. prefix - Fixed intent_classifier test: stronger WHEN query for date_tokens test - Fixed test imports: all use cortex.module_name - Fixed triage test: accept MINIMAL classification for very low priority - Fixed typo: self.self.assertIn → self.assertIn
425 lines
15 KiB
Python
425 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for feedback_loop.py and validate_output.py — 30+ tests."""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from cortex.feedback_loop import (
|
|
parse_since, parse_jsonl, get_text_content, get_tool_name,
|
|
detect_corrections, detect_retries, detect_tool_failures,
|
|
detect_self_errors, detect_knowledge_gaps, analyze_session,
|
|
finding_to_markdown, deduplicate_findings, load_config,
|
|
get_session_timestamp, Finding, append_to_growth_log,
|
|
)
|
|
from cortex.validate_output import (
|
|
extract_mentioned_files, extract_created_files_from_transcript,
|
|
extract_transcript_errors, check_test_results, validate_file_exists,
|
|
validate_python_syntax, validate_typescript_structure, run_validation,
|
|
ValidationReport, parse_jsonl as v_parse_jsonl,
|
|
)
|
|
|
|
DEFAULT_CONFIG = load_config()
|
|
|
|
|
|
def make_entry(role="user", text="hello", entry_type="message", **kwargs):
|
|
"""Helper to create a transcript entry."""
|
|
e = {"type": entry_type, "message": {"role": role, "content": [{"type": "text", "text": text}]}}
|
|
e.update(kwargs)
|
|
return e
|
|
|
|
|
|
def make_tool_call(name="exec", args=None):
|
|
return {"type": "message", "message": {"role": "assistant", "content": [
|
|
{"type": "toolCall", "name": name, "arguments": args or {}}
|
|
]}}
|
|
|
|
|
|
def make_tool_result(text="ok", is_error=False):
|
|
return {"type": "message", "message": {"role": "toolResult", "content": [
|
|
{"type": "text", "text": text}
|
|
], "isError": is_error}}
|
|
|
|
|
|
def write_jsonl(path, entries):
|
|
with open(path, "w") as f:
|
|
for e in entries:
|
|
f.write(json.dumps(e) + "\n")
|
|
|
|
|
|
# === Feedback Loop Tests ===
|
|
|
|
class TestParseSince(unittest.TestCase):
|
|
def test_hours(self):
|
|
result = parse_since("24h")
|
|
self.assertAlmostEqual(
|
|
(datetime.now(timezone.utc) - result).total_seconds(), 86400, delta=5)
|
|
|
|
def test_days(self):
|
|
result = parse_since("7d")
|
|
self.assertAlmostEqual(
|
|
(datetime.now(timezone.utc) - result).total_seconds(), 604800, delta=5)
|
|
|
|
def test_minutes(self):
|
|
result = parse_since("30m")
|
|
self.assertAlmostEqual(
|
|
(datetime.now(timezone.utc) - result).total_seconds(), 1800, delta=5)
|
|
|
|
def test_invalid(self):
|
|
with self.assertRaises(ValueError):
|
|
parse_since("abc")
|
|
|
|
|
|
class TestGetTextContent(unittest.TestCase):
|
|
def test_list_content(self):
|
|
entry = make_entry(text="hello world")
|
|
self.assertEqual(get_text_content(entry), "hello world")
|
|
|
|
def test_string_content(self):
|
|
entry = {"message": {"content": "plain string"}}
|
|
self.assertEqual(get_text_content(entry), "plain string")
|
|
|
|
def test_empty(self):
|
|
entry = {"message": {}}
|
|
self.assertEqual(get_text_content(entry), "")
|
|
|
|
|
|
class TestGetToolName(unittest.TestCase):
|
|
def test_tool_call(self):
|
|
entry = make_tool_call("read")
|
|
self.assertEqual(get_tool_name(entry), "read")
|
|
|
|
def test_no_tool(self):
|
|
entry = make_entry()
|
|
self.assertIsNone(get_tool_name(entry))
|
|
|
|
|
|
class TestDetectCorrections(unittest.TestCase):
|
|
def test_detects_nein(self):
|
|
entries = [
|
|
make_entry("assistant", "Here is the result"),
|
|
make_entry("user", "nein, das ist falsch"),
|
|
]
|
|
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertTrue(len(findings) >= 1)
|
|
self.assertEqual(findings[0].category, "correction")
|
|
|
|
def test_detects_wrong(self):
|
|
entries = [
|
|
make_entry("assistant", "Done!"),
|
|
make_entry("user", "That's wrong"),
|
|
]
|
|
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertTrue(len(findings) >= 1)
|
|
|
|
def test_no_false_positive(self):
|
|
entries = [
|
|
make_entry("assistant", "Here you go"),
|
|
make_entry("user", "Thanks, great job!"),
|
|
]
|
|
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
def test_requires_assistant_before(self):
|
|
entries = [
|
|
make_entry("user", "nein"),
|
|
]
|
|
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
|
|
class TestDetectRetries(unittest.TestCase):
|
|
def test_detects_3_retries(self):
|
|
entries = [make_tool_call("exec") for _ in range(4)]
|
|
findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 1)
|
|
self.assertIn("4 times", findings[0].trigger)
|
|
|
|
def test_no_retry_under_threshold(self):
|
|
entries = [make_tool_call("exec") for _ in range(2)]
|
|
findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
def test_different_tools_no_retry(self):
|
|
entries = [make_tool_call("exec"), make_tool_call("read"), make_tool_call("write")]
|
|
findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
|
|
class TestDetectToolFailures(unittest.TestCase):
|
|
def test_detects_is_error(self):
|
|
entries = [make_tool_result("something failed", is_error=True)]
|
|
findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertTrue(len(findings) >= 1)
|
|
|
|
def test_detects_error_text(self):
|
|
entries = [make_tool_result("Error: command not found")]
|
|
# toolResult role needed
|
|
entries[0]["message"]["role"] = "toolResult"
|
|
findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertTrue(len(findings) >= 1)
|
|
|
|
def test_no_error(self):
|
|
entries = [make_tool_result("All good")]
|
|
entries[0]["message"]["role"] = "toolResult"
|
|
findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
|
|
class TestDetectSelfErrors(unittest.TestCase):
|
|
def test_detects_sorry(self):
|
|
entries = [make_entry("assistant", "Sorry, I made a mistake there")]
|
|
findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 1)
|
|
|
|
def test_detects_entschuldigung(self):
|
|
entries = [make_entry("assistant", "Entschuldigung, das war falsch")]
|
|
findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 1)
|
|
|
|
def test_no_apology(self):
|
|
entries = [make_entry("assistant", "Here is your answer")]
|
|
findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
|
|
class TestDetectKnowledgeGaps(unittest.TestCase):
|
|
def test_detects_gap(self):
|
|
entries = [
|
|
make_entry("assistant", "I couldn't find that information"),
|
|
make_entry("user", "The answer is actually in the config file at /etc/app/config.yaml"),
|
|
]
|
|
findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 1)
|
|
|
|
def test_no_gap_short_reply(self):
|
|
entries = [
|
|
make_entry("assistant", "I couldn't find it"),
|
|
make_entry("user", "ok"),
|
|
]
|
|
findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl")
|
|
self.assertEqual(len(findings), 0)
|
|
|
|
|
|
class TestFindingToMarkdown(unittest.TestCase):
|
|
def test_format(self):
|
|
f = Finding("correction", "User said 'nein'", (10, 12), "test.jsonl")
|
|
md = finding_to_markdown(f, DEFAULT_CONFIG)
|
|
self.assertIn("Auto-detected: Correction", md)
|
|
self.assertIn("User said 'nein'", md)
|
|
self.assertIn("lines 10-12", md)
|
|
|
|
|
|
class TestDeduplicate(unittest.TestCase):
|
|
def test_removes_dupes(self):
|
|
f1 = Finding("correction", "User said nein after agent response", (1, 2), "a.jsonl")
|
|
f2 = Finding("correction", "User said nein after agent response", (5, 6), "a.jsonl")
|
|
result = deduplicate_findings([f1, f2])
|
|
self.assertEqual(len(result), 1)
|
|
|
|
|
|
class TestParseJsonl(unittest.TestCase):
|
|
def test_parses_valid(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
|
f.write('{"a":1}\n{"b":2}\n')
|
|
f.flush()
|
|
result = parse_jsonl(Path(f.name))
|
|
self.assertEqual(len(result), 2)
|
|
os.unlink(f.name)
|
|
|
|
def test_skips_bad_lines(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
|
f.write('{"a":1}\nBAD LINE\n{"b":2}\n')
|
|
f.flush()
|
|
result = parse_jsonl(Path(f.name))
|
|
self.assertEqual(len(result), 2)
|
|
os.unlink(f.name)
|
|
|
|
|
|
# === Validator Tests ===
|
|
|
|
class TestExtractMentionedFiles(unittest.TestCase):
|
|
def test_backtick_files(self):
|
|
task = "Create `feedback_loop.py` and `config.json`"
|
|
files = extract_mentioned_files(task)
|
|
self.assertIn("feedback_loop.py", files)
|
|
self.assertIn("config.json", files)
|
|
|
|
def test_bare_files(self):
|
|
task = "Write test_selfheal.py with 25 tests"
|
|
files = extract_mentioned_files(task)
|
|
self.assertIn("test_selfheal.py", files)
|
|
|
|
|
|
class TestExtractCreatedFiles(unittest.TestCase):
|
|
def test_write_calls(self):
|
|
entries = [{"type": "message", "message": {"role": "assistant", "content": [
|
|
{"type": "toolCall", "name": "Write", "arguments": {"path": "/tmp/test.py"}}
|
|
]}}]
|
|
files = extract_created_files_from_transcript(entries)
|
|
self.assertIn("/tmp/test.py", files)
|
|
|
|
|
|
class TestValidateFileExists(unittest.TestCase):
|
|
def test_existing_file(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".py") as f:
|
|
f.write("print('hi')")
|
|
f.flush()
|
|
ok, detail = validate_file_exists(f.name)
|
|
self.assertTrue(ok)
|
|
os.unlink(f.name)
|
|
|
|
def test_missing_file(self):
|
|
ok, _ = validate_file_exists("/tmp/nonexistent_xyz_abc.py")
|
|
self.assertFalse(ok)
|
|
|
|
def test_empty_file(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
|
|
f.flush()
|
|
ok, detail = validate_file_exists(f.name)
|
|
self.assertFalse(ok)
|
|
self.assertIn("empty", detail)
|
|
os.unlink(f.name)
|
|
|
|
|
|
class TestValidatePythonSyntax(unittest.TestCase):
|
|
def test_valid_python(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
f.write("def hello():\n return 42\n")
|
|
f.flush()
|
|
ok, _ = validate_python_syntax(f.name)
|
|
self.assertTrue(ok)
|
|
os.unlink(f.name)
|
|
|
|
def test_invalid_python(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
f.write("def hello(\n")
|
|
f.flush()
|
|
ok, detail = validate_python_syntax(f.name)
|
|
self.assertFalse(ok)
|
|
self.assertIn("Syntax error", detail)
|
|
os.unlink(f.name)
|
|
|
|
|
|
class TestValidateTypescript(unittest.TestCase):
|
|
def test_valid_ts(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f:
|
|
f.write("export function hello(): string { return 'hi'; }\n")
|
|
f.flush()
|
|
ok, _ = validate_typescript_structure(f.name)
|
|
self.assertTrue(ok)
|
|
os.unlink(f.name)
|
|
|
|
def test_excessive_any(self):
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f:
|
|
f.write("export " + "const x: any = 1;\n" * 15)
|
|
f.flush()
|
|
ok, detail = validate_typescript_structure(f.name)
|
|
self.assertFalse(ok)
|
|
self.assertIn("any", detail)
|
|
os.unlink(f.name)
|
|
|
|
|
|
class TestCheckTestResults(unittest.TestCase):
|
|
def test_pytest_pass(self):
|
|
entries = [make_tool_result("10 passed in 0.5s")]
|
|
entries[0]["message"]["role"] = "toolResult"
|
|
result = check_test_results(entries)
|
|
self.assertTrue(result["tests_found"])
|
|
self.assertTrue(result["tests_passed"])
|
|
|
|
def test_pytest_fail(self):
|
|
entries = [make_tool_result("3 failed, 7 passed")]
|
|
entries[0]["message"]["role"] = "toolResult"
|
|
result = check_test_results(entries)
|
|
self.assertTrue(result["tests_found"])
|
|
self.assertFalse(result["tests_passed"])
|
|
|
|
|
|
class TestValidationReport(unittest.TestCase):
|
|
def test_report_pass(self):
|
|
r = ValidationReport()
|
|
r.add("check1", "pass")
|
|
self.assertTrue(r.ok)
|
|
self.assertEqual(r.passed, 1)
|
|
|
|
def test_report_fail(self):
|
|
r = ValidationReport()
|
|
r.add("check1", "fail", "broken")
|
|
self.assertFalse(r.ok)
|
|
|
|
def test_to_json(self):
|
|
r = ValidationReport()
|
|
r.add("check1", "pass")
|
|
j = r.to_json()
|
|
self.assertIn("checks", j)
|
|
self.assertTrue(j["ok"])
|
|
|
|
|
|
class TestIntegration(unittest.TestCase):
|
|
"""Integration test with a mock session."""
|
|
|
|
def test_full_feedback_loop(self):
|
|
entries = [
|
|
{"type": "session", "id": "test-123", "timestamp": "2026-02-08T10:00:00Z"},
|
|
make_entry("assistant", "Here is the file"),
|
|
make_entry("user", "nein, das ist falsch"),
|
|
make_tool_call("exec"),
|
|
make_tool_call("exec"),
|
|
make_tool_call("exec"),
|
|
make_tool_call("exec"),
|
|
make_entry("assistant", "Sorry, I made an error"),
|
|
make_tool_result("Error: permission denied", is_error=True),
|
|
]
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
|
write_jsonl(f.name, entries)
|
|
findings = analyze_session(parse_jsonl(Path(f.name)), DEFAULT_CONFIG, f.name)
|
|
os.unlink(f.name)
|
|
categories = {f.category for f in findings}
|
|
self.assertIn("correction", categories)
|
|
self.assertIn("retry", categories)
|
|
self.assertIn("self_error", categories)
|
|
self.assertIn("tool_failure", categories)
|
|
|
|
def test_full_validation(self):
|
|
# Create a real file to validate
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as pyf:
|
|
pyf.write("print('hello')\n")
|
|
pyf.flush()
|
|
py_path = pyf.name
|
|
|
|
entries = [
|
|
{"type": "session", "id": "val-test", "timestamp": "2026-02-08T10:00:00Z"},
|
|
{"type": "message", "message": {"role": "assistant", "content": [
|
|
{"type": "toolCall", "name": "Write", "arguments": {"path": py_path}}
|
|
]}},
|
|
make_tool_result("25 passed in 1.2s"),
|
|
]
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
|
write_jsonl(f.name, entries)
|
|
report = run_validation(Path(f.name), f"Create {py_path} with tests")
|
|
os.unlink(f.name)
|
|
os.unlink(py_path)
|
|
self.assertIsInstance(report, ValidationReport)
|
|
self.assertTrue(report.passed > 0)
|
|
|
|
|
|
class TestAppendToGrowthLog(unittest.TestCase):
|
|
def test_dry_run(self):
|
|
f = Finding("correction", "test trigger", (1, 2), "test.jsonl")
|
|
text = append_to_growth_log([f], DEFAULT_CONFIG, dry_run=True)
|
|
self.assertIn("Auto-detected", text)
|
|
|
|
def test_empty_findings(self):
|
|
text = append_to_growth_log([], DEFAULT_CONFIG, dry_run=True)
|
|
self.assertEqual(text, "")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|