darkplex-core/tests/test_selfheal.py

#!/usr/bin/env python3
"""Tests for feedback_loop.py and validate_output.py — 30+ tests."""

import json
import os
import sys
import tempfile
import unittest
from pathlib import Path
from datetime import datetime, timedelta, timezone

sys.path.insert(0, str(Path(__file__).parent))

from cortex.feedback_loop import (
    parse_since, parse_jsonl, get_text_content, get_tool_name,
    detect_corrections, detect_retries, detect_tool_failures,
    detect_self_errors, detect_knowledge_gaps, analyze_session,
    finding_to_markdown, deduplicate_findings, load_config,
    get_session_timestamp, Finding, append_to_growth_log,
)
from cortex.validate_output import (
    extract_mentioned_files, extract_created_files_from_transcript,
    extract_transcript_errors, check_test_results, validate_file_exists,
    validate_python_syntax, validate_typescript_structure, run_validation,
    ValidationReport, parse_jsonl as v_parse_jsonl,
)

DEFAULT_CONFIG = load_config()


def make_entry(role="user", text="hello", entry_type="message", **kwargs):
    """Helper to create a transcript entry."""
    e = {"type": entry_type, "message": {"role": role, "content": [{"type": "text", "text": text}]}}
    e.update(kwargs)
    return e


def make_tool_call(name="exec", args=None):
    return {"type": "message", "message": {"role": "assistant", "content": [
        {"type": "toolCall", "name": name, "arguments": args or {}}
    ]}}


def make_tool_result(text="ok", is_error=False):
    return {"type": "message", "message": {"role": "toolResult", "content": [
        {"type": "text", "text": text}
    ], "isError": is_error}}


def write_jsonl(path, entries):
    with open(path, "w") as f:
        for e in entries:
            f.write(json.dumps(e) + "\n")


# === Feedback Loop Tests ===

class TestParseSince(unittest.TestCase):
    def test_hours(self):
        result = parse_since("24h")
        self.assertAlmostEqual(
            (datetime.now(timezone.utc) - result).total_seconds(), 86400, delta=5)

    def test_days(self):
        result = parse_since("7d")
        self.assertAlmostEqual(
            (datetime.now(timezone.utc) - result).total_seconds(), 604800, delta=5)

    def test_minutes(self):
        result = parse_since("30m")
        self.assertAlmostEqual(
            (datetime.now(timezone.utc) - result).total_seconds(), 1800, delta=5)

    def test_invalid(self):
        with self.assertRaises(ValueError):
            parse_since("abc")


class TestGetTextContent(unittest.TestCase):
    def test_list_content(self):
        entry = make_entry(text="hello world")
        self.assertEqual(get_text_content(entry), "hello world")

    def test_string_content(self):
        entry = {"message": {"content": "plain string"}}
        self.assertEqual(get_text_content(entry), "plain string")

    def test_empty(self):
        entry = {"message": {}}
        self.assertEqual(get_text_content(entry), "")


class TestGetToolName(unittest.TestCase):
    def test_tool_call(self):
        entry = make_tool_call("read")
        self.assertEqual(get_tool_name(entry), "read")

    def test_no_tool(self):
        entry = make_entry()
        self.assertIsNone(get_tool_name(entry))


class TestDetectCorrections(unittest.TestCase):
    def test_detects_nein(self):
        entries = [
            make_entry("assistant", "Here is the result"),
            make_entry("user", "nein, das ist falsch"),
        ]
        findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertTrue(len(findings) >= 1)
        self.assertEqual(findings[0].category, "correction")

    def test_detects_wrong(self):
        entries = [
            make_entry("assistant", "Done!"),
            make_entry("user", "That's wrong"),
        ]
        findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertTrue(len(findings) >= 1)

    def test_no_false_positive(self):
        entries = [
            make_entry("assistant", "Here you go"),
            make_entry("user", "Thanks, great job!"),
        ]
        findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)

    def test_requires_assistant_before(self):
        entries = [
            make_entry("user", "nein"),
        ]
        findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)


class TestDetectRetries(unittest.TestCase):
    def test_detects_3_retries(self):
        entries = [make_tool_call("exec") for _ in range(4)]
        findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 1)
        self.assertIn("4 times", findings[0].trigger)

    def test_no_retry_under_threshold(self):
        entries = [make_tool_call("exec") for _ in range(2)]
        findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)

    def test_different_tools_no_retry(self):
        entries = [make_tool_call("exec"), make_tool_call("read"), make_tool_call("write")]
        findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)


class TestDetectToolFailures(unittest.TestCase):
    def test_detects_is_error(self):
        entries = [make_tool_result("something failed", is_error=True)]
        findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertTrue(len(findings) >= 1)

    def test_detects_error_text(self):
        entries = [make_tool_result("Error: command not found")]
        # toolResult role needed
        entries[0]["message"]["role"] = "toolResult"
        findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertTrue(len(findings) >= 1)

    def test_no_error(self):
        entries = [make_tool_result("All good")]
        entries[0]["message"]["role"] = "toolResult"
        findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)


class TestDetectSelfErrors(unittest.TestCase):
    def test_detects_sorry(self):
        entries = [make_entry("assistant", "Sorry, I made a mistake there")]
        findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 1)

    def test_detects_entschuldigung(self):
        entries = [make_entry("assistant", "Entschuldigung, das war falsch")]
        findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 1)

    def test_no_apology(self):
        entries = [make_entry("assistant", "Here is your answer")]
        findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)


class TestDetectKnowledgeGaps(unittest.TestCase):
    def test_detects_gap(self):
        entries = [
            make_entry("assistant", "I couldn't find that information"),
            make_entry("user", "The answer is actually in the config file at /etc/app/config.yaml"),
        ]
        findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 1)

    def test_no_gap_short_reply(self):
        entries = [
            make_entry("assistant", "I couldn't find it"),
            make_entry("user", "ok"),
        ]
        findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl")
        self.assertEqual(len(findings), 0)


class TestFindingToMarkdown(unittest.TestCase):
    def test_format(self):
        f = Finding("correction", "User said 'nein'", (10, 12), "test.jsonl")
        md = finding_to_markdown(f, DEFAULT_CONFIG)
        self.assertIn("Auto-detected: User Correction", md)
        self.assertIn("User said 'nein'", md)
        self.assertIn("lines 10-12", md)


class TestDeduplicate(unittest.TestCase):
    def test_removes_dupes(self):
        f1 = Finding("correction", "User said nein after agent response", (1, 2), "a.jsonl")
        f2 = Finding("correction", "User said nein after agent response", (5, 6), "a.jsonl")
        result = deduplicate_findings([f1, f2])
        self.assertEqual(len(result), 1)


class TestParseJsonl(unittest.TestCase):
    def test_parses_valid(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
            f.write('{"a":1}\n{"b":2}\n')
            f.flush()
            result = parse_jsonl(Path(f.name))
            self.assertEqual(len(result), 2)
            os.unlink(f.name)

    def test_skips_bad_lines(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
            f.write('{"a":1}\nBAD LINE\n{"b":2}\n')
            f.flush()
            result = parse_jsonl(Path(f.name))
            self.assertEqual(len(result), 2)
            os.unlink(f.name)


# === Validator Tests ===

class TestExtractMentionedFiles(unittest.TestCase):
    def test_backtick_files(self):
        task = "Create `feedback_loop.py` and `config.json`"
        files = extract_mentioned_files(task)
        self.assertIn("feedback_loop.py", files)
        self.assertIn("config.json", files)

    def test_bare_files(self):
        task = "Write test_selfheal.py with 25 tests"
        files = extract_mentioned_files(task)
        self.assertIn("test_selfheal.py", files)


class TestExtractCreatedFiles(unittest.TestCase):
    def test_write_calls(self):
        entries = [{"type": "message", "message": {"role": "assistant", "content": [
            {"type": "toolCall", "name": "Write", "arguments": {"path": "/tmp/test.py"}}
        ]}}]
        files = extract_created_files_from_transcript(entries)
        self.assertIn("/tmp/test.py", files)


class TestValidateFileExists(unittest.TestCase):
    def test_existing_file(self):
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".py") as f:
            f.write("print('hi')")
            f.flush()
            ok, detail = validate_file_exists(f.name)
            self.assertTrue(ok)
            os.unlink(f.name)

    def test_missing_file(self):
        ok, _ = validate_file_exists("/tmp/nonexistent_xyz_abc.py")
        self.assertFalse(ok)

    def test_empty_file(self):
        with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
            f.flush()
            ok, detail = validate_file_exists(f.name)
            self.assertFalse(ok)
            self.assertIn("empty", detail)
            os.unlink(f.name)


class TestValidatePythonSyntax(unittest.TestCase):
    def test_valid_python(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
            f.write("def hello():\n    return 42\n")
            f.flush()
            ok, _ = validate_python_syntax(f.name)
            self.assertTrue(ok)
            os.unlink(f.name)

    def test_invalid_python(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
            f.write("def hello(\n")
            f.flush()
            ok, detail = validate_python_syntax(f.name)
            self.assertFalse(ok)
            self.assertIn("Syntax error", detail)
            os.unlink(f.name)


class TestValidateTypescript(unittest.TestCase):
    def test_valid_ts(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f:
            f.write("export function hello(): string { return 'hi'; }\n")
            f.flush()
            ok, _ = validate_typescript_structure(f.name)
            self.assertTrue(ok)
            os.unlink(f.name)

    def test_excessive_any(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f:
            f.write("export " + "const x: any = 1;\n" * 15)
            f.flush()
            ok, detail = validate_typescript_structure(f.name)
            self.assertFalse(ok)
            self.assertIn("any", detail)
            os.unlink(f.name)


class TestCheckTestResults(unittest.TestCase):
    def test_pytest_pass(self):
        entries = [make_tool_result("10 passed in 0.5s")]
        entries[0]["message"]["role"] = "toolResult"
        result = check_test_results(entries)
        self.assertTrue(result["tests_found"])
        self.assertTrue(result["tests_passed"])

    def test_pytest_fail(self):
        entries = [make_tool_result("3 failed, 7 passed")]
        entries[0]["message"]["role"] = "toolResult"
        result = check_test_results(entries)
        self.assertTrue(result["tests_found"])
        self.assertFalse(result["tests_passed"])


class TestValidationReport(unittest.TestCase):
    def test_report_pass(self):
        r = ValidationReport()
        r.add("check1", "pass")
        self.assertTrue(r.ok)
        self.assertEqual(r.passed, 1)

    def test_report_fail(self):
        r = ValidationReport()
        r.add("check1", "fail", "broken")
        self.assertFalse(r.ok)

    def test_to_json(self):
        r = ValidationReport()
        r.add("check1", "pass")
        j = r.to_json()
        self.assertIn("checks", j)
        self.assertTrue(j["ok"])


class TestIntegration(unittest.TestCase):
    """Integration test with a mock session."""

    def test_full_feedback_loop(self):
        entries = [
            {"type": "session", "id": "test-123", "timestamp": "2026-02-08T10:00:00Z"},
            make_entry("assistant", "Here is the file"),
            make_entry("user", "nein, das ist falsch"),
            make_tool_call("exec"),
            make_tool_call("exec"),
            make_tool_call("exec"),
            make_tool_call("exec"),
            make_entry("assistant", "Sorry, I made an error"),
            make_tool_result("Error: permission denied", is_error=True),
        ]
        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
            write_jsonl(f.name, entries)
            findings = analyze_session(parse_jsonl(Path(f.name)), DEFAULT_CONFIG, f.name)
            os.unlink(f.name)
        categories = {f.category for f in findings}
        self.assertIn("correction", categories)
        self.assertIn("retry", categories)
        self.assertIn("self_error", categories)
        self.assertIn("tool_failure", categories)

    def test_full_validation(self):
        # Create a real file to validate
        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as pyf:
            pyf.write("print('hello')\n")
            pyf.flush()
            py_path = pyf.name

        entries = [
            {"type": "session", "id": "val-test", "timestamp": "2026-02-08T10:00:00Z"},
            {"type": "message", "message": {"role": "assistant", "content": [
                {"type": "toolCall", "name": "Write", "arguments": {"path": py_path}}
            ]}},
            make_tool_result("25 passed in 1.2s"),
        ]
        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
            write_jsonl(f.name, entries)
            report = run_validation(Path(f.name), f"Create {py_path} with tests")
            os.unlink(f.name)
        os.unlink(py_path)
        self.assertIsInstance(report, ValidationReport)
        self.assertTrue(report.passed > 0)


class TestAppendToGrowthLog(unittest.TestCase):
    def test_dry_run(self):
        f = Finding("correction", "test trigger", (1, 2), "test.jsonl")
        text = append_to_growth_log([f], DEFAULT_CONFIG, dry_run=True)
        self.assertIn("Auto-detected", text)

    def test_empty_findings(self):
        text = append_to_growth_log([], DEFAULT_CONFIG, dry_run=True)
        self.assertEqual(text, "")


if __name__ == "__main__":
    unittest.main()