darkplex-core/tests/test_enhancements.py

#!/usr/bin/env python3
"""
Tests for memory enhancement components.
Run: python3 -m pytest test_enhancements.py -v
  or: python3 test_enhancements.py
"""

import json
import sys
import time
import unittest
from datetime import date, timedelta
from pathlib import Path

# Ensure our modules are importable
sys.path.insert(0, str(Path(__file__).parent))

from cortex.composite_scorer import (
    SearchResult, extract_date, recency_score, source_weight,
    multi_term_confidence, score_results,
)
from cortex.intent_classifier import classify, reset_cache, IntentResult
from cortex.auto_handoff import extract_markers, format_handoff, generate_handoff


class TestCompositeScorer(unittest.TestCase):
    """Tests for composite_scorer.py"""

    def test_extract_date_from_path(self):
        r = SearchResult(text="test", source_path="memory/2026-02-07.md")
        self.assertEqual(extract_date(r), date(2026, 2, 7))

    def test_extract_date_from_content(self):
        r = SearchResult(text="On 2026-01-15 we decided to...", source_path="MEMORY.md")
        self.assertEqual(extract_date(r), date(2026, 1, 15))

    def test_extract_date_none(self):
        r = SearchResult(text="no date here", source_path="README.md")
        self.assertIsNone(extract_date(r))

    def test_recency_today(self):
        today = date.today()
        score = recency_score(today, today)
        self.assertAlmostEqual(score, 1.0, places=2)

    def test_recency_half_life(self):
        today = date.today()
        half_life = 14
        old = today - timedelta(days=half_life)
        score = recency_score(old, today, half_life)
        self.assertAlmostEqual(score, 0.5, places=2)

    def test_recency_very_old(self):
        today = date.today()
        old = today - timedelta(days=365)
        score = recency_score(old, today, 14)
        self.assertLess(score, 0.01)

    def test_recency_no_date(self):
        score = recency_score(None)
        self.assertEqual(score, 0.3)

    def test_source_weight_memory_md(self):
        w = source_weight("MEMORY.md", {"MEMORY.md": 1.0, "default": 0.4})
        self.assertEqual(w, 1.0)

    def test_source_weight_daily_note(self):
        weights = {"memory/": 0.7, "default": 0.4}
        w = source_weight("memory/2026-02-07.md", weights)
        self.assertEqual(w, 0.7)

    def test_source_weight_default(self):
        w = source_weight("random/file.txt", {"MEMORY.md": 1.0, "default": 0.4})
        self.assertEqual(w, 0.4)

    def test_multi_term_all_match(self):
        score = multi_term_confidence("gateway fix watchdog", "The gateway fix for watchdog issue")
        self.assertAlmostEqual(score, 1.0)

    def test_multi_term_partial(self):
        score = multi_term_confidence("gateway fix watchdog", "The gateway is running fine")
        self.assertAlmostEqual(score, 1/3, places=2)

    def test_multi_term_none(self):
        score = multi_term_confidence("gateway fix", "completely unrelated text")
        self.assertEqual(score, 0.0)

    def test_multi_term_empty_query(self):
        score = multi_term_confidence("", "some text")
        self.assertEqual(score, 0.5)

    def test_score_results_ordering(self):
        """Recent high-source results should rank above old high-search-score results."""
        today = date.today()
        recent_date = today.strftime("%Y-%m-%d")
        old_date = (today - timedelta(days=90)).strftime("%Y-%m-%d")

        results = [
            SearchResult(text="old but high match", source_path=f"memory/{old_date}.md", original_score=0.95),
            SearchResult(text="recent in MEMORY", source_path="MEMORY.md", original_score=0.7,
                         metadata={"date": recent_date}),
        ]
        scored = score_results(results, query="test query", reference_date=today)
        # MEMORY.md with recent date should rank higher due to source + recency
        self.assertEqual(scored[0].source_path, "MEMORY.md")

    def test_score_results_empty(self):
        self.assertEqual(score_results([], query="test"), [])

    def test_scoring_performance(self):
        """Composite scoring should be <10ms for 50 results."""
        results = [
            SearchResult(text=f"Result {i} about gateway and NATS",
                         source_path=f"memory/2026-01-{i%28+1:02d}.md",
                         original_score=0.5 + (i % 10) / 20)
            for i in range(50)
        ]
        start = time.perf_counter()
        score_results(results, query="gateway NATS")
        elapsed_ms = (time.perf_counter() - start) * 1000
        self.assertLess(elapsed_ms, 10, f"Scoring took {elapsed_ms:.1f}ms, should be <10ms")


class TestIntentClassifier(unittest.TestCase):
    """Tests for intent_classifier.py"""

    def setUp(self):
        reset_cache()

    def test_who_query_english(self):
        r = classify("Albert Hild contact")
        self.assertEqual(r.intent, "WHO")

    def test_who_query_german(self):
        r = classify("wer ist Sebastian Baier")
        self.assertEqual(r.intent, "WHO")

    def test_when_query(self):
        r = classify("when did we fix the gateway")
        self.assertEqual(r.intent, "WHEN")

    def test_when_query_german(self):
        r = classify("wann wurde TypeDB eingerichtet")
        self.assertEqual(r.intent, "WHEN")

    def test_why_query(self):
        r = classify("why did we choose NATS over Kafka")
        self.assertEqual(r.intent, "WHY")

    def test_why_query_german(self):
        r = classify("warum ChromaDB statt Pinecone")
        self.assertEqual(r.intent, "WHY")

    def test_what_query(self):
        r = classify("Mondo Gate regulatory status")
        self.assertEqual(r.intent, "WHAT")

    def test_empty_query(self):
        r = classify("")
        self.assertEqual(r.intent, "WHAT")
        self.assertLess(r.confidence, 0.5)

    def test_mixed_language(self):
        r = classify("who is the Ansprechpartner for Mondo Gate")
        self.assertEqual(r.intent, "WHO")

    def test_classification_speed(self):
        """Intent classification must be <5ms."""
        queries = [
            "Albert Hild contact", "when did we fix the gateway",
            "why NATS over Kafka", "infrastructure status",
            "wer ist bei Vainplex", "wann Viola fix",
        ]
        for q in queries:
            r = classify(q)
            self.assertLess(r.classification_ms, 5.0,
                            f"Classification of '{q}' took {r.classification_ms:.2f}ms")

    def test_capitalized_names_boost_who(self):
        r = classify("Sebastian Baier Mondo Gate")
        self.assertEqual(r.intent, "WHO")

    def test_date_tokens_boost_when(self):
        r = classify("wann war der watchdog incident im Februar")
        self.assertEqual(r.intent, "WHEN")

    def test_returns_weight_adjustments(self):
        r = classify("warum ChromaDB statt Pinecone")
        self.assertEqual(r.intent, "WHY")
        # WHY should have weight adjustments for MEMORY.md boost
        self.assertIsInstance(r.weight_adjustments, dict)


class TestAutoHandoff(unittest.TestCase):
    """Tests for auto_handoff.py"""

    SAMPLE_CONTENT = """# Session 2026-02-07

## Infrastructure Work
- Fixed gateway watchdog issue
- DECISION: Use NATS instead of Kafka for event streaming
- TODO: Set up monitoring for new NATS cluster
- BLOCKED: Waiting for DNS propagation for new domain

## Open Items
- QUESTION: Should we migrate old events to new format?
- [ ] Update documentation
- [ ] Run integration tests

DECISION: Switch Mona to Opus model for better reasoning
TODO: Benchmark Opus vs Sonnet for our workload
"""

    def test_extract_decisions(self):
        result = extract_markers(self.SAMPLE_CONTENT)
        self.assertGreaterEqual(len(result["decisions"]), 2)
        self.assertTrue(any("NATS" in d for d in result["decisions"]))

    def test_extract_todos(self):
        result = extract_markers(self.SAMPLE_CONTENT)
        self.assertGreaterEqual(len(result["todos"]), 3)  # 2 explicit + 2 checkboxes

    def test_extract_blocked(self):
        result = extract_markers(self.SAMPLE_CONTENT)
        self.assertGreaterEqual(len(result["blocked"]), 1)
        self.assertTrue(any("DNS" in b for b in result["blocked"]))

    def test_extract_questions(self):
        result = extract_markers(self.SAMPLE_CONTENT)
        self.assertGreaterEqual(len(result["questions"]), 1)

    def test_extract_headings(self):
        result = extract_markers(self.SAMPLE_CONTENT)
        self.assertGreaterEqual(len(result.get("key_context", [])), 1)

    def test_format_handoff(self):
        extracted = {
            "decisions": ["Use NATS"],
            "todos": ["Set up monitoring"],
            "blocked": ["DNS propagation"],
            "questions": ["Migrate events?"],
        }
        md = format_handoff(extracted, title="Test Handoff")
        self.assertIn("# Test Handoff", md)
        self.assertIn("✅ Decisions", md)
        self.assertIn("Use NATS", md)
        self.assertIn("📋 Next Steps", md)

    def test_format_empty(self):
        md = format_handoff({})
        self.assertIn("Session Handoff", md)

    def test_generate_handoff_regex(self):
        result = generate_handoff(self.SAMPLE_CONTENT, source="test.md")
        self.assertIn("NATS", result)
        self.assertIn("DNS", result)

    def test_real_daily_note(self):
        """Test with a real daily note if available."""
        note_path = Path.home() / "clawd" / "memory" / "2026-02-08.md"
        if note_path.exists():
            content = note_path.read_text()
            result = extract_markers(content)
            # Should at least extract headings
            self.assertIsInstance(result, dict)
            self.assertIn("key_context", result)


class TestIntegration(unittest.TestCase):
    """Integration tests for the full pipeline."""

    def test_file_search_finds_results(self):
        """File search should find results in memory/."""
        from cortex.enhanced_search import search_files
        results = search_files("gateway")
        # Should find at least something in memory files
        self.assertIsInstance(results, list)

    def test_enhanced_search_pipeline(self):
        """Full pipeline should run without errors."""
        from cortex.enhanced_search import enhanced_search
        result = enhanced_search("gateway", use_unified=False, max_results=5)
        self.assertIn("query", result)
        self.assertIn("intent", result)
        self.assertIn("results", result)
        self.assertIn("timing", result)
        self.assertEqual(result["intent"]["type"], "WHAT")

    def test_pipeline_who_query(self):
        from cortex.enhanced_search import enhanced_search
        result = enhanced_search("Albert Hild contact", use_unified=False, max_results=5)
        self.assertEqual(result["intent"]["type"], "WHO")

    def test_pipeline_timing(self):
        """Full pipeline without unified should be fast."""
        from cortex.enhanced_search import enhanced_search
        result = enhanced_search("test query", use_unified=False, max_results=5)
        # Should complete in reasonable time (< 2 seconds for file search)
        self.assertLess(result["timing"]["total_ms"], 2000)


if __name__ == "__main__":
    unittest.main(verbosity=2)