#!/usr/bin/env python3 """Tests for feedback_loop.py and validate_output.py — 30+ tests.""" import json import os import sys import tempfile import unittest from pathlib import Path from datetime import datetime, timedelta, timezone sys.path.insert(0, str(Path(__file__).parent)) from cortex.feedback_loop import ( parse_since, parse_jsonl, get_text_content, get_tool_name, detect_corrections, detect_retries, detect_tool_failures, detect_self_errors, detect_knowledge_gaps, analyze_session, finding_to_markdown, deduplicate_findings, load_config, get_session_timestamp, Finding, append_to_growth_log, ) from cortex.validate_output import ( extract_mentioned_files, extract_created_files_from_transcript, extract_transcript_errors, check_test_results, validate_file_exists, validate_python_syntax, validate_typescript_structure, run_validation, ValidationReport, parse_jsonl as v_parse_jsonl, ) DEFAULT_CONFIG = load_config() def make_entry(role="user", text="hello", entry_type="message", **kwargs): """Helper to create a transcript entry.""" e = {"type": entry_type, "message": {"role": role, "content": [{"type": "text", "text": text}]}} e.update(kwargs) return e def make_tool_call(name="exec", args=None): return {"type": "message", "message": {"role": "assistant", "content": [ {"type": "toolCall", "name": name, "arguments": args or {}} ]}} def make_tool_result(text="ok", is_error=False): return {"type": "message", "message": {"role": "toolResult", "content": [ {"type": "text", "text": text} ], "isError": is_error}} def write_jsonl(path, entries): with open(path, "w") as f: for e in entries: f.write(json.dumps(e) + "\n") # === Feedback Loop Tests === class TestParseSince(unittest.TestCase): def test_hours(self): result = parse_since("24h") self.assertAlmostEqual( (datetime.now(timezone.utc) - result).total_seconds(), 86400, delta=5) def test_days(self): result = parse_since("7d") self.assertAlmostEqual( (datetime.now(timezone.utc) - result).total_seconds(), 604800, delta=5) def test_minutes(self): result = parse_since("30m") self.assertAlmostEqual( (datetime.now(timezone.utc) - result).total_seconds(), 1800, delta=5) def test_invalid(self): with self.assertRaises(ValueError): parse_since("abc") class TestGetTextContent(unittest.TestCase): def test_list_content(self): entry = make_entry(text="hello world") self.assertEqual(get_text_content(entry), "hello world") def test_string_content(self): entry = {"message": {"content": "plain string"}} self.assertEqual(get_text_content(entry), "plain string") def test_empty(self): entry = {"message": {}} self.assertEqual(get_text_content(entry), "") class TestGetToolName(unittest.TestCase): def test_tool_call(self): entry = make_tool_call("read") self.assertEqual(get_tool_name(entry), "read") def test_no_tool(self): entry = make_entry() self.assertIsNone(get_tool_name(entry)) class TestDetectCorrections(unittest.TestCase): def test_detects_nein(self): entries = [ make_entry("assistant", "Here is the result"), make_entry("user", "nein, das ist falsch"), ] findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") self.assertTrue(len(findings) >= 1) self.assertEqual(findings[0].category, "correction") def test_detects_wrong(self): entries = [ make_entry("assistant", "Done!"), make_entry("user", "That's wrong"), ] findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") self.assertTrue(len(findings) >= 1) def test_no_false_positive(self): entries = [ make_entry("assistant", "Here you go"), make_entry("user", "Thanks, great job!"), ] findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) def test_requires_assistant_before(self): entries = [ make_entry("user", "nein"), ] findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) class TestDetectRetries(unittest.TestCase): def test_detects_3_retries(self): entries = [make_tool_call("exec") for _ in range(4)] findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 1) self.assertIn("4 times", findings[0].trigger) def test_no_retry_under_threshold(self): entries = [make_tool_call("exec") for _ in range(2)] findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) def test_different_tools_no_retry(self): entries = [make_tool_call("exec"), make_tool_call("read"), make_tool_call("write")] findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) class TestDetectToolFailures(unittest.TestCase): def test_detects_is_error(self): entries = [make_tool_result("something failed", is_error=True)] findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl") self.assertTrue(len(findings) >= 1) def test_detects_error_text(self): entries = [make_tool_result("Error: command not found")] # toolResult role needed entries[0]["message"]["role"] = "toolResult" findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl") self.assertTrue(len(findings) >= 1) def test_no_error(self): entries = [make_tool_result("All good")] entries[0]["message"]["role"] = "toolResult" findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) class TestDetectSelfErrors(unittest.TestCase): def test_detects_sorry(self): entries = [make_entry("assistant", "Sorry, I made a mistake there")] findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 1) def test_detects_entschuldigung(self): entries = [make_entry("assistant", "Entschuldigung, das war falsch")] findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 1) def test_no_apology(self): entries = [make_entry("assistant", "Here is your answer")] findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) class TestDetectKnowledgeGaps(unittest.TestCase): def test_detects_gap(self): entries = [ make_entry("assistant", "I couldn't find that information"), make_entry("user", "The answer is actually in the config file at /etc/app/config.yaml"), ] findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 1) def test_no_gap_short_reply(self): entries = [ make_entry("assistant", "I couldn't find it"), make_entry("user", "ok"), ] findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl") self.assertEqual(len(findings), 0) class TestFindingToMarkdown(unittest.TestCase): def test_format(self): f = Finding("correction", "User said 'nein'", (10, 12), "test.jsonl") md = finding_to_markdown(f, DEFAULT_CONFIG) self.assertIn("Auto-detected: Correction", md) self.assertIn("User said 'nein'", md) self.assertIn("lines 10-12", md) class TestDeduplicate(unittest.TestCase): def test_removes_dupes(self): f1 = Finding("correction", "User said nein after agent response", (1, 2), "a.jsonl") f2 = Finding("correction", "User said nein after agent response", (5, 6), "a.jsonl") result = deduplicate_findings([f1, f2]) self.assertEqual(len(result), 1) class TestParseJsonl(unittest.TestCase): def test_parses_valid(self): with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: f.write('{"a":1}\n{"b":2}\n') f.flush() result = parse_jsonl(Path(f.name)) self.assertEqual(len(result), 2) os.unlink(f.name) def test_skips_bad_lines(self): with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: f.write('{"a":1}\nBAD LINE\n{"b":2}\n') f.flush() result = parse_jsonl(Path(f.name)) self.assertEqual(len(result), 2) os.unlink(f.name) # === Validator Tests === class TestExtractMentionedFiles(unittest.TestCase): def test_backtick_files(self): task = "Create `feedback_loop.py` and `config.json`" files = extract_mentioned_files(task) self.assertIn("feedback_loop.py", files) self.assertIn("config.json", files) def test_bare_files(self): task = "Write test_selfheal.py with 25 tests" files = extract_mentioned_files(task) self.assertIn("test_selfheal.py", files) class TestExtractCreatedFiles(unittest.TestCase): def test_write_calls(self): entries = [{"type": "message", "message": {"role": "assistant", "content": [ {"type": "toolCall", "name": "Write", "arguments": {"path": "/tmp/test.py"}} ]}}] files = extract_created_files_from_transcript(entries) self.assertIn("/tmp/test.py", files) class TestValidateFileExists(unittest.TestCase): def test_existing_file(self): with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".py") as f: f.write("print('hi')") f.flush() ok, detail = validate_file_exists(f.name) self.assertTrue(ok) os.unlink(f.name) def test_missing_file(self): ok, _ = validate_file_exists("/tmp/nonexistent_xyz_abc.py") self.assertFalse(ok) def test_empty_file(self): with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: f.flush() ok, detail = validate_file_exists(f.name) self.assertFalse(ok) self.assertIn("empty", detail) os.unlink(f.name) class TestValidatePythonSyntax(unittest.TestCase): def test_valid_python(self): with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("def hello():\n return 42\n") f.flush() ok, _ = validate_python_syntax(f.name) self.assertTrue(ok) os.unlink(f.name) def test_invalid_python(self): with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("def hello(\n") f.flush() ok, detail = validate_python_syntax(f.name) self.assertFalse(ok) self.assertIn("Syntax error", detail) os.unlink(f.name) class TestValidateTypescript(unittest.TestCase): def test_valid_ts(self): with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f: f.write("export function hello(): string { return 'hi'; }\n") f.flush() ok, _ = validate_typescript_structure(f.name) self.assertTrue(ok) os.unlink(f.name) def test_excessive_any(self): with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f: f.write("export " + "const x: any = 1;\n" * 15) f.flush() ok, detail = validate_typescript_structure(f.name) self.assertFalse(ok) self.assertIn("any", detail) os.unlink(f.name) class TestCheckTestResults(unittest.TestCase): def test_pytest_pass(self): entries = [make_tool_result("10 passed in 0.5s")] entries[0]["message"]["role"] = "toolResult" result = check_test_results(entries) self.assertTrue(result["tests_found"]) self.assertTrue(result["tests_passed"]) def test_pytest_fail(self): entries = [make_tool_result("3 failed, 7 passed")] entries[0]["message"]["role"] = "toolResult" result = check_test_results(entries) self.assertTrue(result["tests_found"]) self.assertFalse(result["tests_passed"]) class TestValidationReport(unittest.TestCase): def test_report_pass(self): r = ValidationReport() r.add("check1", "pass") self.assertTrue(r.ok) self.assertEqual(r.passed, 1) def test_report_fail(self): r = ValidationReport() r.add("check1", "fail", "broken") self.assertFalse(r.ok) def test_to_json(self): r = ValidationReport() r.add("check1", "pass") j = r.to_json() self.assertIn("checks", j) self.assertTrue(j["ok"]) class TestIntegration(unittest.TestCase): """Integration test with a mock session.""" def test_full_feedback_loop(self): entries = [ {"type": "session", "id": "test-123", "timestamp": "2026-02-08T10:00:00Z"}, make_entry("assistant", "Here is the file"), make_entry("user", "nein, das ist falsch"), make_tool_call("exec"), make_tool_call("exec"), make_tool_call("exec"), make_tool_call("exec"), make_entry("assistant", "Sorry, I made an error"), make_tool_result("Error: permission denied", is_error=True), ] with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: write_jsonl(f.name, entries) findings = analyze_session(parse_jsonl(Path(f.name)), DEFAULT_CONFIG, f.name) os.unlink(f.name) categories = {f.category for f in findings} self.assertIn("correction", categories) self.assertIn("retry", categories) self.assertIn("self_error", categories) self.assertIn("tool_failure", categories) def test_full_validation(self): # Create a real file to validate with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as pyf: pyf.write("print('hello')\n") pyf.flush() py_path = pyf.name entries = [ {"type": "session", "id": "val-test", "timestamp": "2026-02-08T10:00:00Z"}, {"type": "message", "message": {"role": "assistant", "content": [ {"type": "toolCall", "name": "Write", "arguments": {"path": py_path}} ]}}, make_tool_result("25 passed in 1.2s"), ] with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: write_jsonl(f.name, entries) report = run_validation(Path(f.name), f"Create {py_path} with tests") os.unlink(f.name) os.unlink(py_path) self.assertIsInstance(report, ValidationReport) self.assertTrue(report.passed > 0) class TestAppendToGrowthLog(unittest.TestCase): def test_dry_run(self): f = Finding("correction", "test trigger", (1, 2), "test.jsonl") text = append_to_growth_log([f], DEFAULT_CONFIG, dry_run=True) self.assertIn("Auto-detected", text) def test_empty_findings(self): text = append_to_growth_log([], DEFAULT_CONFIG, dry_run=True) self.assertEqual(text, "") if __name__ == "__main__": unittest.main()