darkplex-core/tests/test_selfheal.py
Claudia 43d033e242 feat: initial cortex package — 8 intelligence modules, CLI, Docker
Modules: triage, health_scanner, feedback_loop, memory_hygiene,
         roadmap, validate_output, enhanced_search, auto_handoff
         + composite_scorer, intent_classifier

CLI: 'cortex <module> <command>' unified entry point
Tests: 157/169 passing (12 assertion mismatches from rename)
Docker: python:3.11-slim based
2026-02-09 11:18:20 +01:00

425 lines
15 KiB
Python

#!/usr/bin/env python3
"""Tests for feedback_loop.py and validate_output.py — 30+ tests."""
import json
import os
import sys
import tempfile
import unittest
from pathlib import Path
from datetime import datetime, timedelta, timezone
sys.path.insert(0, str(Path(__file__).parent))
from cortex.feedback_loop import (
parse_since, parse_jsonl, get_text_content, get_tool_name,
detect_corrections, detect_retries, detect_tool_failures,
detect_self_errors, detect_knowledge_gaps, analyze_session,
finding_to_markdown, deduplicate_findings, load_config,
get_session_timestamp, Finding, append_to_growth_log,
)
from cortex.validate_output import (
extract_mentioned_files, extract_created_files_from_transcript,
extract_transcript_errors, check_test_results, validate_file_exists,
validate_python_syntax, validate_typescript_structure, run_validation,
ValidationReport, parse_jsonl as v_parse_jsonl,
)
DEFAULT_CONFIG = load_config()
def make_entry(role="user", text="hello", entry_type="message", **kwargs):
"""Helper to create a transcript entry."""
e = {"type": entry_type, "message": {"role": role, "content": [{"type": "text", "text": text}]}}
e.update(kwargs)
return e
def make_tool_call(name="exec", args=None):
return {"type": "message", "message": {"role": "assistant", "content": [
{"type": "toolCall", "name": name, "arguments": args or {}}
]}}
def make_tool_result(text="ok", is_error=False):
return {"type": "message", "message": {"role": "toolResult", "content": [
{"type": "text", "text": text}
], "isError": is_error}}
def write_jsonl(path, entries):
with open(path, "w") as f:
for e in entries:
f.write(json.dumps(e) + "\n")
# === Feedback Loop Tests ===
class TestParseSince(unittest.TestCase):
def test_hours(self):
result = parse_since("24h")
self.assertAlmostEqual(
(datetime.now(timezone.utc) - result).total_seconds(), 86400, delta=5)
def test_days(self):
result = parse_since("7d")
self.assertAlmostEqual(
(datetime.now(timezone.utc) - result).total_seconds(), 604800, delta=5)
def test_minutes(self):
result = parse_since("30m")
self.assertAlmostEqual(
(datetime.now(timezone.utc) - result).total_seconds(), 1800, delta=5)
def test_invalid(self):
with self.assertRaises(ValueError):
parse_since("abc")
class TestGetTextContent(unittest.TestCase):
def test_list_content(self):
entry = make_entry(text="hello world")
self.assertEqual(get_text_content(entry), "hello world")
def test_string_content(self):
entry = {"message": {"content": "plain string"}}
self.assertEqual(get_text_content(entry), "plain string")
def test_empty(self):
entry = {"message": {}}
self.assertEqual(get_text_content(entry), "")
class TestGetToolName(unittest.TestCase):
def test_tool_call(self):
entry = make_tool_call("read")
self.assertEqual(get_tool_name(entry), "read")
def test_no_tool(self):
entry = make_entry()
self.assertIsNone(get_tool_name(entry))
class TestDetectCorrections(unittest.TestCase):
def test_detects_nein(self):
entries = [
make_entry("assistant", "Here is the result"),
make_entry("user", "nein, das ist falsch"),
]
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertTrue(len(findings) >= 1)
self.assertEqual(findings[0].category, "correction")
def test_detects_wrong(self):
entries = [
make_entry("assistant", "Done!"),
make_entry("user", "That's wrong"),
]
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertTrue(len(findings) >= 1)
def test_no_false_positive(self):
entries = [
make_entry("assistant", "Here you go"),
make_entry("user", "Thanks, great job!"),
]
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
def test_requires_assistant_before(self):
entries = [
make_entry("user", "nein"),
]
findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
class TestDetectRetries(unittest.TestCase):
def test_detects_3_retries(self):
entries = [make_tool_call("exec") for _ in range(4)]
findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 1)
self.assertIn("4 times", findings[0].trigger)
def test_no_retry_under_threshold(self):
entries = [make_tool_call("exec") for _ in range(2)]
findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
def test_different_tools_no_retry(self):
entries = [make_tool_call("exec"), make_tool_call("read"), make_tool_call("write")]
findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
class TestDetectToolFailures(unittest.TestCase):
def test_detects_is_error(self):
entries = [make_tool_result("something failed", is_error=True)]
findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertTrue(len(findings) >= 1)
def test_detects_error_text(self):
entries = [make_tool_result("Error: command not found")]
# toolResult role needed
entries[0]["message"]["role"] = "toolResult"
findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertTrue(len(findings) >= 1)
def test_no_error(self):
entries = [make_tool_result("All good")]
entries[0]["message"]["role"] = "toolResult"
findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
class TestDetectSelfErrors(unittest.TestCase):
def test_detects_sorry(self):
entries = [make_entry("assistant", "Sorry, I made a mistake there")]
findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 1)
def test_detects_entschuldigung(self):
entries = [make_entry("assistant", "Entschuldigung, das war falsch")]
findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 1)
def test_no_apology(self):
entries = [make_entry("assistant", "Here is your answer")]
findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
class TestDetectKnowledgeGaps(unittest.TestCase):
def test_detects_gap(self):
entries = [
make_entry("assistant", "I couldn't find that information"),
make_entry("user", "The answer is actually in the config file at /etc/app/config.yaml"),
]
findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 1)
def test_no_gap_short_reply(self):
entries = [
make_entry("assistant", "I couldn't find it"),
make_entry("user", "ok"),
]
findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl")
self.assertEqual(len(findings), 0)
class TestFindingToMarkdown(unittest.TestCase):
def test_format(self):
f = Finding("correction", "User said 'nein'", (10, 12), "test.jsonl")
md = finding_to_markdown(f, DEFAULT_CONFIG)
self.assertIn("Auto-detected: User Correction", md)
self.assertIn("User said 'nein'", md)
self.assertIn("lines 10-12", md)
class TestDeduplicate(unittest.TestCase):
def test_removes_dupes(self):
f1 = Finding("correction", "User said nein after agent response", (1, 2), "a.jsonl")
f2 = Finding("correction", "User said nein after agent response", (5, 6), "a.jsonl")
result = deduplicate_findings([f1, f2])
self.assertEqual(len(result), 1)
class TestParseJsonl(unittest.TestCase):
def test_parses_valid(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
f.write('{"a":1}\n{"b":2}\n')
f.flush()
result = parse_jsonl(Path(f.name))
self.assertEqual(len(result), 2)
os.unlink(f.name)
def test_skips_bad_lines(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
f.write('{"a":1}\nBAD LINE\n{"b":2}\n')
f.flush()
result = parse_jsonl(Path(f.name))
self.assertEqual(len(result), 2)
os.unlink(f.name)
# === Validator Tests ===
class TestExtractMentionedFiles(unittest.TestCase):
def test_backtick_files(self):
task = "Create `feedback_loop.py` and `config.json`"
files = extract_mentioned_files(task)
self.assertIn("feedback_loop.py", files)
self.assertIn("config.json", files)
def test_bare_files(self):
task = "Write test_selfheal.py with 25 tests"
files = extract_mentioned_files(task)
self.assertIn("test_selfheal.py", files)
class TestExtractCreatedFiles(unittest.TestCase):
def test_write_calls(self):
entries = [{"type": "message", "message": {"role": "assistant", "content": [
{"type": "toolCall", "name": "Write", "arguments": {"path": "/tmp/test.py"}}
]}}]
files = extract_created_files_from_transcript(entries)
self.assertIn("/tmp/test.py", files)
class TestValidateFileExists(unittest.TestCase):
def test_existing_file(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".py") as f:
f.write("print('hi')")
f.flush()
ok, detail = validate_file_exists(f.name)
self.assertTrue(ok)
os.unlink(f.name)
def test_missing_file(self):
ok, _ = validate_file_exists("/tmp/nonexistent_xyz_abc.py")
self.assertFalse(ok)
def test_empty_file(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.flush()
ok, detail = validate_file_exists(f.name)
self.assertFalse(ok)
self.assertIn("empty", detail)
os.unlink(f.name)
class TestValidatePythonSyntax(unittest.TestCase):
def test_valid_python(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write("def hello():\n return 42\n")
f.flush()
ok, _ = validate_python_syntax(f.name)
self.assertTrue(ok)
os.unlink(f.name)
def test_invalid_python(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write("def hello(\n")
f.flush()
ok, detail = validate_python_syntax(f.name)
self.assertFalse(ok)
self.assertIn("Syntax error", detail)
os.unlink(f.name)
class TestValidateTypescript(unittest.TestCase):
def test_valid_ts(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f:
f.write("export function hello(): string { return 'hi'; }\n")
f.flush()
ok, _ = validate_typescript_structure(f.name)
self.assertTrue(ok)
os.unlink(f.name)
def test_excessive_any(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f:
f.write("export " + "const x: any = 1;\n" * 15)
f.flush()
ok, detail = validate_typescript_structure(f.name)
self.assertFalse(ok)
self.assertIn("any", detail)
os.unlink(f.name)
class TestCheckTestResults(unittest.TestCase):
def test_pytest_pass(self):
entries = [make_tool_result("10 passed in 0.5s")]
entries[0]["message"]["role"] = "toolResult"
result = check_test_results(entries)
self.assertTrue(result["tests_found"])
self.assertTrue(result["tests_passed"])
def test_pytest_fail(self):
entries = [make_tool_result("3 failed, 7 passed")]
entries[0]["message"]["role"] = "toolResult"
result = check_test_results(entries)
self.assertTrue(result["tests_found"])
self.assertFalse(result["tests_passed"])
class TestValidationReport(unittest.TestCase):
def test_report_pass(self):
r = ValidationReport()
r.add("check1", "pass")
self.assertTrue(r.ok)
self.assertEqual(r.passed, 1)
def test_report_fail(self):
r = ValidationReport()
r.add("check1", "fail", "broken")
self.assertFalse(r.ok)
def test_to_json(self):
r = ValidationReport()
r.add("check1", "pass")
j = r.to_json()
self.assertIn("checks", j)
self.assertTrue(j["ok"])
class TestIntegration(unittest.TestCase):
"""Integration test with a mock session."""
def test_full_feedback_loop(self):
entries = [
{"type": "session", "id": "test-123", "timestamp": "2026-02-08T10:00:00Z"},
make_entry("assistant", "Here is the file"),
make_entry("user", "nein, das ist falsch"),
make_tool_call("exec"),
make_tool_call("exec"),
make_tool_call("exec"),
make_tool_call("exec"),
make_entry("assistant", "Sorry, I made an error"),
make_tool_result("Error: permission denied", is_error=True),
]
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
write_jsonl(f.name, entries)
findings = analyze_session(parse_jsonl(Path(f.name)), DEFAULT_CONFIG, f.name)
os.unlink(f.name)
categories = {f.category for f in findings}
self.assertIn("correction", categories)
self.assertIn("retry", categories)
self.assertIn("self_error", categories)
self.assertIn("tool_failure", categories)
def test_full_validation(self):
# Create a real file to validate
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as pyf:
pyf.write("print('hello')\n")
pyf.flush()
py_path = pyf.name
entries = [
{"type": "session", "id": "val-test", "timestamp": "2026-02-08T10:00:00Z"},
{"type": "message", "message": {"role": "assistant", "content": [
{"type": "toolCall", "name": "Write", "arguments": {"path": py_path}}
]}},
make_tool_result("25 passed in 1.2s"),
]
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
write_jsonl(f.name, entries)
report = run_validation(Path(f.name), f"Create {py_path} with tests")
os.unlink(f.name)
os.unlink(py_path)
self.assertIsInstance(report, ValidationReport)
self.assertTrue(report.passed > 0)
class TestAppendToGrowthLog(unittest.TestCase):
def test_dry_run(self):
f = Finding("correction", "test trigger", (1, 2), "test.jsonl")
text = append_to_growth_log([f], DEFAULT_CONFIG, dry_run=True)
self.assertIn("Auto-detected", text)
def test_empty_findings(self):
text = append_to_growth_log([], DEFAULT_CONFIG, dry_run=True)
self.assertEqual(text, "")
if __name__ == "__main__":
unittest.main()