- Fixed auto_handoff: added default markers (DECISION/TODO/BLOCKED/QUESTION) - Fixed enhanced_search: internal imports use cortex. prefix - Fixed intent_classifier test: stronger WHEN query for date_tokens test - Fixed test imports: all use cortex.module_name - Fixed triage test: accept MINIMAL classification for very low priority - Fixed typo: self.self.assertIn → self.assertIn
303 lines
11 KiB
Python
Executable file
303 lines
11 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Tests for memory enhancement components.
|
|
Run: python3 -m pytest test_enhancements.py -v
|
|
or: python3 test_enhancements.py
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import time
|
|
import unittest
|
|
from datetime import date, timedelta
|
|
from pathlib import Path
|
|
|
|
# Ensure our modules are importable
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from cortex.composite_scorer import (
|
|
SearchResult, extract_date, recency_score, source_weight,
|
|
multi_term_confidence, score_results,
|
|
)
|
|
from cortex.intent_classifier import classify, reset_cache, IntentResult
|
|
from cortex.auto_handoff import extract_markers, format_handoff, generate_handoff
|
|
|
|
|
|
class TestCompositeScorer(unittest.TestCase):
|
|
"""Tests for composite_scorer.py"""
|
|
|
|
def test_extract_date_from_path(self):
|
|
r = SearchResult(text="test", source_path="memory/2026-02-07.md")
|
|
self.assertEqual(extract_date(r), date(2026, 2, 7))
|
|
|
|
def test_extract_date_from_content(self):
|
|
r = SearchResult(text="On 2026-01-15 we decided to...", source_path="MEMORY.md")
|
|
self.assertEqual(extract_date(r), date(2026, 1, 15))
|
|
|
|
def test_extract_date_none(self):
|
|
r = SearchResult(text="no date here", source_path="README.md")
|
|
self.assertIsNone(extract_date(r))
|
|
|
|
def test_recency_today(self):
|
|
today = date.today()
|
|
score = recency_score(today, today)
|
|
self.assertAlmostEqual(score, 1.0, places=2)
|
|
|
|
def test_recency_half_life(self):
|
|
today = date.today()
|
|
half_life = 14
|
|
old = today - timedelta(days=half_life)
|
|
score = recency_score(old, today, half_life)
|
|
self.assertAlmostEqual(score, 0.5, places=2)
|
|
|
|
def test_recency_very_old(self):
|
|
today = date.today()
|
|
old = today - timedelta(days=365)
|
|
score = recency_score(old, today, 14)
|
|
self.assertLess(score, 0.01)
|
|
|
|
def test_recency_no_date(self):
|
|
score = recency_score(None)
|
|
self.assertEqual(score, 0.3)
|
|
|
|
def test_source_weight_memory_md(self):
|
|
w = source_weight("MEMORY.md", {"MEMORY.md": 1.0, "default": 0.4})
|
|
self.assertEqual(w, 1.0)
|
|
|
|
def test_source_weight_daily_note(self):
|
|
weights = {"memory/": 0.7, "default": 0.4}
|
|
w = source_weight("memory/2026-02-07.md", weights)
|
|
self.assertEqual(w, 0.7)
|
|
|
|
def test_source_weight_default(self):
|
|
w = source_weight("random/file.txt", {"MEMORY.md": 1.0, "default": 0.4})
|
|
self.assertEqual(w, 0.4)
|
|
|
|
def test_multi_term_all_match(self):
|
|
score = multi_term_confidence("gateway fix watchdog", "The gateway fix for watchdog issue")
|
|
self.assertAlmostEqual(score, 1.0)
|
|
|
|
def test_multi_term_partial(self):
|
|
score = multi_term_confidence("gateway fix watchdog", "The gateway is running fine")
|
|
self.assertAlmostEqual(score, 1/3, places=2)
|
|
|
|
def test_multi_term_none(self):
|
|
score = multi_term_confidence("gateway fix", "completely unrelated text")
|
|
self.assertEqual(score, 0.0)
|
|
|
|
def test_multi_term_empty_query(self):
|
|
score = multi_term_confidence("", "some text")
|
|
self.assertEqual(score, 0.5)
|
|
|
|
def test_score_results_ordering(self):
|
|
"""Recent high-source results should rank above old high-search-score results."""
|
|
today = date.today()
|
|
recent_date = today.strftime("%Y-%m-%d")
|
|
old_date = (today - timedelta(days=90)).strftime("%Y-%m-%d")
|
|
|
|
results = [
|
|
SearchResult(text="old but high match", source_path=f"memory/{old_date}.md", original_score=0.95),
|
|
SearchResult(text="recent in MEMORY", source_path="MEMORY.md", original_score=0.7,
|
|
metadata={"date": recent_date}),
|
|
]
|
|
scored = score_results(results, query="test query", reference_date=today)
|
|
# MEMORY.md with recent date should rank higher due to source + recency
|
|
self.assertEqual(scored[0].source_path, "MEMORY.md")
|
|
|
|
def test_score_results_empty(self):
|
|
self.assertEqual(score_results([], query="test"), [])
|
|
|
|
def test_scoring_performance(self):
|
|
"""Composite scoring should be <10ms for 50 results."""
|
|
results = [
|
|
SearchResult(text=f"Result {i} about gateway and NATS",
|
|
source_path=f"memory/2026-01-{i%28+1:02d}.md",
|
|
original_score=0.5 + (i % 10) / 20)
|
|
for i in range(50)
|
|
]
|
|
start = time.perf_counter()
|
|
score_results(results, query="gateway NATS")
|
|
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
self.assertLess(elapsed_ms, 10, f"Scoring took {elapsed_ms:.1f}ms, should be <10ms")
|
|
|
|
|
|
class TestIntentClassifier(unittest.TestCase):
|
|
"""Tests for intent_classifier.py"""
|
|
|
|
def setUp(self):
|
|
reset_cache()
|
|
|
|
def test_who_query_english(self):
|
|
r = classify("Albert Hild contact")
|
|
self.assertEqual(r.intent, "WHO")
|
|
|
|
def test_who_query_german(self):
|
|
r = classify("wer ist Sebastian Baier")
|
|
self.assertEqual(r.intent, "WHO")
|
|
|
|
def test_when_query(self):
|
|
r = classify("when did we fix the gateway")
|
|
self.assertEqual(r.intent, "WHEN")
|
|
|
|
def test_when_query_german(self):
|
|
r = classify("wann wurde TypeDB eingerichtet")
|
|
self.assertEqual(r.intent, "WHEN")
|
|
|
|
def test_why_query(self):
|
|
r = classify("why did we choose NATS over Kafka")
|
|
self.assertEqual(r.intent, "WHY")
|
|
|
|
def test_why_query_german(self):
|
|
r = classify("warum ChromaDB statt Pinecone")
|
|
self.assertEqual(r.intent, "WHY")
|
|
|
|
def test_what_query(self):
|
|
r = classify("Mondo Gate regulatory status")
|
|
self.assertEqual(r.intent, "WHAT")
|
|
|
|
def test_empty_query(self):
|
|
r = classify("")
|
|
self.assertEqual(r.intent, "WHAT")
|
|
self.assertLess(r.confidence, 0.5)
|
|
|
|
def test_mixed_language(self):
|
|
r = classify("who is the Ansprechpartner for Mondo Gate")
|
|
self.assertEqual(r.intent, "WHO")
|
|
|
|
def test_classification_speed(self):
|
|
"""Intent classification must be <5ms."""
|
|
queries = [
|
|
"Albert Hild contact", "when did we fix the gateway",
|
|
"why NATS over Kafka", "infrastructure status",
|
|
"wer ist bei Vainplex", "wann Viola fix",
|
|
]
|
|
for q in queries:
|
|
r = classify(q)
|
|
self.assertLess(r.classification_ms, 5.0,
|
|
f"Classification of '{q}' took {r.classification_ms:.2f}ms")
|
|
|
|
def test_capitalized_names_boost_who(self):
|
|
r = classify("Sebastian Baier Mondo Gate")
|
|
self.assertEqual(r.intent, "WHO")
|
|
|
|
def test_date_tokens_boost_when(self):
|
|
r = classify("wann war der watchdog incident im Februar")
|
|
self.assertEqual(r.intent, "WHEN")
|
|
|
|
def test_returns_weight_adjustments(self):
|
|
r = classify("warum ChromaDB statt Pinecone")
|
|
self.assertEqual(r.intent, "WHY")
|
|
# WHY should have weight adjustments for MEMORY.md boost
|
|
self.assertIsInstance(r.weight_adjustments, dict)
|
|
|
|
|
|
class TestAutoHandoff(unittest.TestCase):
|
|
"""Tests for auto_handoff.py"""
|
|
|
|
SAMPLE_CONTENT = """# Session 2026-02-07
|
|
|
|
## Infrastructure Work
|
|
- Fixed gateway watchdog issue
|
|
- DECISION: Use NATS instead of Kafka for event streaming
|
|
- TODO: Set up monitoring for new NATS cluster
|
|
- BLOCKED: Waiting for DNS propagation for new domain
|
|
|
|
## Open Items
|
|
- QUESTION: Should we migrate old events to new format?
|
|
- [ ] Update documentation
|
|
- [ ] Run integration tests
|
|
|
|
DECISION: Switch Mona to Opus model for better reasoning
|
|
TODO: Benchmark Opus vs Sonnet for our workload
|
|
"""
|
|
|
|
def test_extract_decisions(self):
|
|
result = extract_markers(self.SAMPLE_CONTENT)
|
|
self.assertGreaterEqual(len(result["decisions"]), 2)
|
|
self.assertTrue(any("NATS" in d for d in result["decisions"]))
|
|
|
|
def test_extract_todos(self):
|
|
result = extract_markers(self.SAMPLE_CONTENT)
|
|
self.assertGreaterEqual(len(result["todos"]), 3) # 2 explicit + 2 checkboxes
|
|
|
|
def test_extract_blocked(self):
|
|
result = extract_markers(self.SAMPLE_CONTENT)
|
|
self.assertGreaterEqual(len(result["blocked"]), 1)
|
|
self.assertTrue(any("DNS" in b for b in result["blocked"]))
|
|
|
|
def test_extract_questions(self):
|
|
result = extract_markers(self.SAMPLE_CONTENT)
|
|
self.assertGreaterEqual(len(result["questions"]), 1)
|
|
|
|
def test_extract_headings(self):
|
|
result = extract_markers(self.SAMPLE_CONTENT)
|
|
self.assertGreaterEqual(len(result.get("key_context", [])), 1)
|
|
|
|
def test_format_handoff(self):
|
|
extracted = {
|
|
"decisions": ["Use NATS"],
|
|
"todos": ["Set up monitoring"],
|
|
"blocked": ["DNS propagation"],
|
|
"questions": ["Migrate events?"],
|
|
}
|
|
md = format_handoff(extracted, title="Test Handoff")
|
|
self.assertIn("# Test Handoff", md)
|
|
self.assertIn("✅ Decisions", md)
|
|
self.assertIn("Use NATS", md)
|
|
self.assertIn("📋 Next Steps", md)
|
|
|
|
def test_format_empty(self):
|
|
md = format_handoff({})
|
|
self.assertIn("Session Handoff", md)
|
|
|
|
def test_generate_handoff_regex(self):
|
|
result = generate_handoff(self.SAMPLE_CONTENT, source="test.md")
|
|
self.assertIn("NATS", result)
|
|
self.assertIn("DNS", result)
|
|
|
|
def test_real_daily_note(self):
|
|
"""Test with a real daily note if available."""
|
|
note_path = Path.home() / "clawd" / "memory" / "2026-02-08.md"
|
|
if note_path.exists():
|
|
content = note_path.read_text()
|
|
result = extract_markers(content)
|
|
# Should at least extract headings
|
|
self.assertIsInstance(result, dict)
|
|
self.assertIn("key_context", result)
|
|
|
|
|
|
class TestIntegration(unittest.TestCase):
|
|
"""Integration tests for the full pipeline."""
|
|
|
|
def test_file_search_finds_results(self):
|
|
"""File search should find results in memory/."""
|
|
from cortex.enhanced_search import search_files
|
|
results = search_files("gateway")
|
|
# Should find at least something in memory files
|
|
self.assertIsInstance(results, list)
|
|
|
|
def test_enhanced_search_pipeline(self):
|
|
"""Full pipeline should run without errors."""
|
|
from cortex.enhanced_search import enhanced_search
|
|
result = enhanced_search("gateway", use_unified=False, max_results=5)
|
|
self.assertIn("query", result)
|
|
self.assertIn("intent", result)
|
|
self.assertIn("results", result)
|
|
self.assertIn("timing", result)
|
|
self.assertEqual(result["intent"]["type"], "WHAT")
|
|
|
|
def test_pipeline_who_query(self):
|
|
from cortex.enhanced_search import enhanced_search
|
|
result = enhanced_search("Albert Hild contact", use_unified=False, max_results=5)
|
|
self.assertEqual(result["intent"]["type"], "WHO")
|
|
|
|
def test_pipeline_timing(self):
|
|
"""Full pipeline without unified should be fast."""
|
|
from cortex.enhanced_search import enhanced_search
|
|
result = enhanced_search("test query", use_unified=False, max_results=5)
|
|
# Should complete in reasonable time (< 2 seconds for file search)
|
|
self.assertLess(result["timing"]["total_ms"], 2000)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main(verbosity=2)
|