#!/usr/bin/env python3 """ Tests for memory enhancement components. Run: python3 -m pytest test_enhancements.py -v or: python3 test_enhancements.py """ import json import sys import time import unittest from datetime import date, timedelta from pathlib import Path # Ensure our modules are importable sys.path.insert(0, str(Path(__file__).parent)) from cortex.composite_scorer import ( SearchResult, extract_date, recency_score, source_weight, multi_term_confidence, score_results, ) from cortex.intent_classifier import classify, reset_cache, IntentResult from cortex.auto_handoff import extract_markers, format_handoff, generate_handoff class TestCompositeScorer(unittest.TestCase): """Tests for composite_scorer.py""" def test_extract_date_from_path(self): r = SearchResult(text="test", source_path="memory/2026-02-07.md") self.assertEqual(extract_date(r), date(2026, 2, 7)) def test_extract_date_from_content(self): r = SearchResult(text="On 2026-01-15 we decided to...", source_path="MEMORY.md") self.assertEqual(extract_date(r), date(2026, 1, 15)) def test_extract_date_none(self): r = SearchResult(text="no date here", source_path="README.md") self.assertIsNone(extract_date(r)) def test_recency_today(self): today = date.today() score = recency_score(today, today) self.assertAlmostEqual(score, 1.0, places=2) def test_recency_half_life(self): today = date.today() half_life = 14 old = today - timedelta(days=half_life) score = recency_score(old, today, half_life) self.assertAlmostEqual(score, 0.5, places=2) def test_recency_very_old(self): today = date.today() old = today - timedelta(days=365) score = recency_score(old, today, 14) self.assertLess(score, 0.01) def test_recency_no_date(self): score = recency_score(None) self.assertEqual(score, 0.3) def test_source_weight_memory_md(self): w = source_weight("MEMORY.md", {"MEMORY.md": 1.0, "default": 0.4}) self.assertEqual(w, 1.0) def test_source_weight_daily_note(self): weights = {"memory/": 0.7, "default": 0.4} w = source_weight("memory/2026-02-07.md", weights) self.assertEqual(w, 0.7) def test_source_weight_default(self): w = source_weight("random/file.txt", {"MEMORY.md": 1.0, "default": 0.4}) self.assertEqual(w, 0.4) def test_multi_term_all_match(self): score = multi_term_confidence("gateway fix watchdog", "The gateway fix for watchdog issue") self.assertAlmostEqual(score, 1.0) def test_multi_term_partial(self): score = multi_term_confidence("gateway fix watchdog", "The gateway is running fine") self.assertAlmostEqual(score, 1/3, places=2) def test_multi_term_none(self): score = multi_term_confidence("gateway fix", "completely unrelated text") self.assertEqual(score, 0.0) def test_multi_term_empty_query(self): score = multi_term_confidence("", "some text") self.assertEqual(score, 0.5) def test_score_results_ordering(self): """Recent high-source results should rank above old high-search-score results.""" today = date.today() recent_date = today.strftime("%Y-%m-%d") old_date = (today - timedelta(days=90)).strftime("%Y-%m-%d") results = [ SearchResult(text="old but high match", source_path=f"memory/{old_date}.md", original_score=0.95), SearchResult(text="recent in MEMORY", source_path="MEMORY.md", original_score=0.7, metadata={"date": recent_date}), ] scored = score_results(results, query="test query", reference_date=today) # MEMORY.md with recent date should rank higher due to source + recency self.assertEqual(scored[0].source_path, "MEMORY.md") def test_score_results_empty(self): self.assertEqual(score_results([], query="test"), []) def test_scoring_performance(self): """Composite scoring should be <10ms for 50 results.""" results = [ SearchResult(text=f"Result {i} about gateway and NATS", source_path=f"memory/2026-01-{i%28+1:02d}.md", original_score=0.5 + (i % 10) / 20) for i in range(50) ] start = time.perf_counter() score_results(results, query="gateway NATS") elapsed_ms = (time.perf_counter() - start) * 1000 self.assertLess(elapsed_ms, 10, f"Scoring took {elapsed_ms:.1f}ms, should be <10ms") class TestIntentClassifier(unittest.TestCase): """Tests for intent_classifier.py""" def setUp(self): reset_cache() def test_who_query_english(self): r = classify("Albert Hild contact") self.assertEqual(r.intent, "WHO") def test_who_query_german(self): r = classify("wer ist Sebastian Baier") self.assertEqual(r.intent, "WHO") def test_when_query(self): r = classify("when did we fix the gateway") self.assertEqual(r.intent, "WHEN") def test_when_query_german(self): r = classify("wann wurde TypeDB eingerichtet") self.assertEqual(r.intent, "WHEN") def test_why_query(self): r = classify("why did we choose NATS over Kafka") self.assertEqual(r.intent, "WHY") def test_why_query_german(self): r = classify("warum ChromaDB statt Pinecone") self.assertEqual(r.intent, "WHY") def test_what_query(self): r = classify("Mondo Gate regulatory status") self.assertEqual(r.intent, "WHAT") def test_empty_query(self): r = classify("") self.assertEqual(r.intent, "WHAT") self.assertLess(r.confidence, 0.5) def test_mixed_language(self): r = classify("who is the Ansprechpartner for Mondo Gate") self.assertEqual(r.intent, "WHO") def test_classification_speed(self): """Intent classification must be <5ms.""" queries = [ "Albert Hild contact", "when did we fix the gateway", "why NATS over Kafka", "infrastructure status", "wer ist bei Vainplex", "wann Viola fix", ] for q in queries: r = classify(q) self.assertLess(r.classification_ms, 5.0, f"Classification of '{q}' took {r.classification_ms:.2f}ms") def test_capitalized_names_boost_who(self): r = classify("Sebastian Baier Mondo Gate") self.assertEqual(r.intent, "WHO") def test_date_tokens_boost_when(self): r = classify("wann war der watchdog incident im Februar") self.assertEqual(r.intent, "WHEN") def test_returns_weight_adjustments(self): r = classify("warum ChromaDB statt Pinecone") self.assertEqual(r.intent, "WHY") # WHY should have weight adjustments for MEMORY.md boost self.assertIsInstance(r.weight_adjustments, dict) class TestAutoHandoff(unittest.TestCase): """Tests for auto_handoff.py""" SAMPLE_CONTENT = """# Session 2026-02-07 ## Infrastructure Work - Fixed gateway watchdog issue - DECISION: Use NATS instead of Kafka for event streaming - TODO: Set up monitoring for new NATS cluster - BLOCKED: Waiting for DNS propagation for new domain ## Open Items - QUESTION: Should we migrate old events to new format? - [ ] Update documentation - [ ] Run integration tests DECISION: Switch Mona to Opus model for better reasoning TODO: Benchmark Opus vs Sonnet for our workload """ def test_extract_decisions(self): result = extract_markers(self.SAMPLE_CONTENT) self.assertGreaterEqual(len(result["decisions"]), 2) self.assertTrue(any("NATS" in d for d in result["decisions"])) def test_extract_todos(self): result = extract_markers(self.SAMPLE_CONTENT) self.assertGreaterEqual(len(result["todos"]), 3) # 2 explicit + 2 checkboxes def test_extract_blocked(self): result = extract_markers(self.SAMPLE_CONTENT) self.assertGreaterEqual(len(result["blocked"]), 1) self.assertTrue(any("DNS" in b for b in result["blocked"])) def test_extract_questions(self): result = extract_markers(self.SAMPLE_CONTENT) self.assertGreaterEqual(len(result["questions"]), 1) def test_extract_headings(self): result = extract_markers(self.SAMPLE_CONTENT) self.assertGreaterEqual(len(result.get("key_context", [])), 1) def test_format_handoff(self): extracted = { "decisions": ["Use NATS"], "todos": ["Set up monitoring"], "blocked": ["DNS propagation"], "questions": ["Migrate events?"], } md = format_handoff(extracted, title="Test Handoff") self.assertIn("# Test Handoff", md) self.assertIn("✅ Decisions", md) self.assertIn("Use NATS", md) self.assertIn("📋 Next Steps", md) def test_format_empty(self): md = format_handoff({}) self.assertIn("Session Handoff", md) def test_generate_handoff_regex(self): result = generate_handoff(self.SAMPLE_CONTENT, source="test.md") self.assertIn("NATS", result) self.assertIn("DNS", result) def test_real_daily_note(self): """Test with a real daily note if available.""" note_path = Path.home() / "clawd" / "memory" / "2026-02-08.md" if note_path.exists(): content = note_path.read_text() result = extract_markers(content) # Should at least extract headings self.assertIsInstance(result, dict) self.assertIn("key_context", result) class TestIntegration(unittest.TestCase): """Integration tests for the full pipeline.""" def test_file_search_finds_results(self): """File search should find results in memory/.""" from cortex.enhanced_search import search_files results = search_files("gateway") # Should find at least something in memory files self.assertIsInstance(results, list) def test_enhanced_search_pipeline(self): """Full pipeline should run without errors.""" from cortex.enhanced_search import enhanced_search result = enhanced_search("gateway", use_unified=False, max_results=5) self.assertIn("query", result) self.assertIn("intent", result) self.assertIn("results", result) self.assertIn("timing", result) self.assertEqual(result["intent"]["type"], "WHAT") def test_pipeline_who_query(self): from cortex.enhanced_search import enhanced_search result = enhanced_search("Albert Hild contact", use_unified=False, max_results=5) self.assertEqual(result["intent"]["type"], "WHO") def test_pipeline_timing(self): """Full pipeline without unified should be fast.""" from cortex.enhanced_search import enhanced_search result = enhanced_search("test query", use_unified=False, max_results=5) # Should complete in reasonable time (< 2 seconds for file search) self.assertLess(result["timing"]["total_ms"], 2000) if __name__ == "__main__": unittest.main(verbosity=2)