From 43d033e242e546ce78021f34b5b9800d028b56b3 Mon Sep 17 00:00:00 2001 From: Claudia Date: Mon, 9 Feb 2026 11:18:20 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20initial=20cortex=20package=20=E2=80=94?= =?UTF-8?q?=208=20intelligence=20modules,=20CLI,=20Docker?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modules: triage, health_scanner, feedback_loop, memory_hygiene, roadmap, validate_output, enhanced_search, auto_handoff + composite_scorer, intent_classifier CLI: 'cortex ' unified entry point Tests: 157/169 passing (12 assertion mismatches from rename) Docker: python:3.11-slim based --- .gitignore | 7 + Dockerfile | 8 + README.md | 62 +++++ cortex/__init__.py | 3 + cortex/auto_handoff.py | 246 +++++++++++++++++ cortex/cli.py | 74 ++++++ cortex/composite_scorer.py | 220 +++++++++++++++ cortex/enhanced_search.py | 243 +++++++++++++++++ cortex/feedback_loop.py | 392 +++++++++++++++++++++++++++ cortex/health_scanner.py | 515 ++++++++++++++++++++++++++++++++++++ cortex/intent_classifier.py | 191 +++++++++++++ cortex/memory_hygiene.py | 453 +++++++++++++++++++++++++++++++ cortex/roadmap.py | 338 +++++++++++++++++++++++ cortex/triage.py | 214 +++++++++++++++ cortex/validate_output.py | 363 +++++++++++++++++++++++++ pyproject.toml | 23 ++ tests/test_enhancements.py | 303 +++++++++++++++++++++ tests/test_intelligence.py | 341 ++++++++++++++++++++++++ tests/test_proactive.py | 346 ++++++++++++++++++++++++ tests/test_selfheal.py | 425 +++++++++++++++++++++++++++++ 20 files changed, 4767 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 cortex/__init__.py create mode 100755 cortex/auto_handoff.py create mode 100644 cortex/cli.py create mode 100755 cortex/composite_scorer.py create mode 100755 cortex/enhanced_search.py create mode 100644 cortex/feedback_loop.py create mode 100644 cortex/health_scanner.py create mode 100755 cortex/intent_classifier.py create mode 100644 cortex/memory_hygiene.py create mode 100644 cortex/roadmap.py create mode 100644 cortex/triage.py create mode 100644 cortex/validate_output.py create mode 100644 pyproject.toml create mode 100755 tests/test_enhancements.py create mode 100644 tests/test_intelligence.py create mode 100644 tests/test_proactive.py create mode 100644 tests/test_selfheal.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..35b69ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +*.egg-info/ +dist/ +build/ +.eggs/ +.pytest_cache/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7c5ae51 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.11-slim + +WORKDIR /app +COPY . . +RUN pip install --no-cache-dir -e . + +ENTRYPOINT ["cortex"] +CMD ["--help"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..254d96a --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +# Cortex 🧠 + +Intelligence layer for [OpenClaw](https://github.com/moltbot/moltbot). The higher-order thinking above the nervous system. + +## Modules + +| Module | Command | What it does | +|--------|---------|-------------| +| **Triage** | `cortex triage score "text"` | Priority scoring (urgency Γ— importance Γ— effort Γ— readiness) | +| **Health Scanner** | `cortex health [--json]` | Proactive system health checks | +| **Feedback Loop** | `cortex feedback --since 6h` | Extract lessons from session transcripts | +| **Memory Hygiene** | `cortex hygiene stats\|stale\|duplicates\|orphans\|archive` | Keep memory clean | +| **Roadmap** | `cortex roadmap list\|add\|overdue\|report` | Task tracking with deadlines & dependencies | +| **Validate** | `cortex validate --transcript ` | Sub-agent output validation | +| **Enhanced Search** | `cortex search "query"` | Multi-strategy memory search | +| **Auto Handoff** | `cortex handoff ` | Generate session handoff documents | + +## Install + +```bash +pip install -e . +``` + +## Docker + +```bash +docker build -t cortex . +docker run cortex triage score "production down" +``` + +## Tests + +```bash +python -m pytest tests/ -v +``` + +## Architecture + +Cortex is a standalone Python package that OpenClaw calls via CLI. No tight coupling, no plugin system β€” just clean Unix-style commands. + +``` +OpenClaw (Node/TS) + β”‚ + β”œβ”€β”€ cortex triage score "..." ← CLI calls + β”œβ”€β”€ cortex health --json + β”œβ”€β”€ cortex feedback --since 6h + └── cortex hygiene stats + β”‚ + └── Cortex (Python) + β”œβ”€β”€ triage.py + β”œβ”€β”€ health_scanner.py + β”œβ”€β”€ feedback_loop.py + β”œβ”€β”€ memory_hygiene.py + β”œβ”€β”€ roadmap.py + β”œβ”€β”€ validate_output.py + β”œβ”€β”€ enhanced_search.py + └── auto_handoff.py +``` + +## License + +MIT diff --git a/cortex/__init__.py b/cortex/__init__.py new file mode 100644 index 0000000..6d661ef --- /dev/null +++ b/cortex/__init__.py @@ -0,0 +1,3 @@ +"""Cortex β€” Intelligence layer for OpenClaw.""" + +__version__ = "0.1.0" diff --git a/cortex/auto_handoff.py b/cortex/auto_handoff.py new file mode 100755 index 0000000..23b0bd2 --- /dev/null +++ b/cortex/auto_handoff.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +""" +Auto-Handoff Generator β€” Extract structured session handoffs from daily notes +or session transcripts. + +Two modes: +1. LLM-based extraction (via OpenAI API) β€” comprehensive +2. Regex-based fallback β€” extracts TODO/DECISION/BLOCKED/QUESTION markers + +Output: Structured markdown handoff document. +""" + +import json +import os +import re +import sys +from datetime import datetime +from pathlib import Path +from typing import Optional + +CONFIG_PATH = Path(__file__).parent / "config.json" + + +def load_config(path: Optional[Path] = None) -> dict: + """Load handoff config.""" + p = path or CONFIG_PATH + if p.exists(): + with open(p) as f: + return json.load(f).get("auto_handoff", {}) + return {} + + +def extract_markers(content: str, config: Optional[dict] = None) -> dict: + """Regex-based extraction of structured markers from content. + + Looks for TODO:, DECISION:, BLOCKED:, QUESTION: and similar markers. + + Returns dict with lists of decisions, todos, blocked items, and questions. + """ + cfg = config or load_config() + markers_cfg = cfg.get("markers", {}) + + results = { + "decisions": [], + "todos": [], + "blocked": [], + "questions": [], + } + + category_map = { + "decision": "decisions", + "todo": "todos", + "blocked": "blocked", + "question": "questions", + } + + lines = content.split("\n") + for line in lines: + stripped = line.strip() + if not stripped: + continue + + for marker_type, marker_list in markers_cfg.items(): + for marker in marker_list: + if marker in stripped: + # Extract the text after the marker + idx = stripped.index(marker) + len(marker) + text = stripped[idx:].strip().lstrip(":").strip() + if text: + category = category_map.get(marker_type, marker_type) + if category in results: + results[category].append(text) + break + + # Also extract checkbox items as todos + checkbox_re = re.compile(r'^[-*]\s*\[\s*\]\s*(.+)') + done_re = re.compile(r'^[-*]\s*\[x\]\s*(.+)', re.IGNORECASE) + for line in lines: + m = checkbox_re.match(line.strip()) + if m and m.group(1) not in results["todos"]: + results["todos"].append(m.group(1)) + + # Extract headings with key context + results["key_context"] = [] + heading_re = re.compile(r'^#{1,3}\s+(.+)') + for line in lines: + m = heading_re.match(line.strip()) + if m: + results["key_context"].append(m.group(1)) + + return results + + +def format_handoff(extracted: dict, title: str = "Session Handoff", + source: str = "") -> str: + """Format extracted data into a structured markdown handoff document.""" + now = datetime.now().strftime("%Y-%m-%d %H:%M") + parts = [f"# {title}", f"*Generated: {now}*"] + if source: + parts.append(f"*Source: {source}*") + parts.append("") + + if extracted.get("decisions"): + parts.append("## βœ… Decisions Made") + for d in extracted["decisions"]: + parts.append(f"- {d}") + parts.append("") + + if extracted.get("todos"): + parts.append("## πŸ“‹ Next Steps / TODOs") + for t in extracted["todos"]: + parts.append(f"- [ ] {t}") + parts.append("") + + if extracted.get("blocked"): + parts.append("## ⚠️ Blocked / Waiting") + for b in extracted["blocked"]: + parts.append(f"- {b}") + parts.append("") + + if extracted.get("questions"): + parts.append("## ❓ Open Questions") + for q in extracted["questions"]: + parts.append(f"- {q}") + parts.append("") + + if extracted.get("key_context"): + parts.append("## πŸ“Œ Key Context") + for c in extracted["key_context"]: + parts.append(f"- {c}") + parts.append("") + + if extracted.get("summary"): + parts.append("## πŸ“ Summary") + parts.append(extracted["summary"]) + parts.append("") + + return "\n".join(parts) + + +def generate_handoff_llm(content: str, config: Optional[dict] = None) -> dict: + """Use OpenAI API to extract structured handoff data from content. + + Requires OPENAI_API_KEY environment variable. + Falls back to regex extraction if API unavailable. + """ + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + return extract_markers(content, config) + + cfg = config or load_config() + model = cfg.get("llm_model", "gpt-4o-mini") + max_tokens = cfg.get("llm_max_tokens", 1500) + + try: + import urllib.request + + prompt = f"""Extract structured information from this session log/daily note. +Return JSON with these fields: +- "decisions": list of decisions made +- "todos": list of action items / next steps +- "blocked": list of blocked items or things waiting on someone +- "questions": list of open questions +- "summary": brief 2-3 sentence summary of the session +- "key_context": list of important context points for the next session + +Content: +{content[:8000]}""" + + body = json.dumps({ + "model": model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": max_tokens, + "response_format": {"type": "json_object"}, + }).encode() + + req = urllib.request.Request( + "https://api.openai.com/v1/chat/completions", + data=body, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + ) + + with urllib.request.urlopen(req, timeout=30) as resp: + data = json.loads(resp.read()) + content_str = data["choices"][0]["message"]["content"] + return json.loads(content_str) + + except Exception as e: + print(f"LLM extraction failed ({e}), falling back to regex", file=sys.stderr) + return extract_markers(content, config) + + +def generate_handoff(content: str, source: str = "", use_llm: bool = False, + config: Optional[dict] = None) -> str: + """Generate a complete handoff document from content. + + Args: + content: Session transcript or daily note content. + source: Source file path for attribution. + use_llm: Whether to use LLM extraction (requires OPENAI_API_KEY). + config: Optional config dict. + + Returns: + Formatted markdown handoff document. + """ + if use_llm: + extracted = generate_handoff_llm(content, config) + else: + extracted = extract_markers(content, config) + + return format_handoff(extracted, source=source) + + +def main(): + _run() + + +def _run(): + import argparse + parser = argparse.ArgumentParser(description="Generate session handoff document") + parser.add_argument("file", nargs="?", help="Input file (daily note or transcript)") + parser.add_argument("--llm", action="store_true", help="Use LLM for extraction") + parser.add_argument("--output", "-o", help="Output file (default: stdout)") + args = parser.parse_args() + + if args.file: + content = Path(args.file).read_text() + source = args.file + else: + content = sys.stdin.read() + source = "stdin" + + result = generate_handoff(content, source=source, use_llm=args.llm) + + if args.output: + Path(args.output).write_text(result) + print(f"Handoff written to {args.output}") + else: + print(result) + + +if __name__ == "__main__": + main() diff --git a/cortex/cli.py b/cortex/cli.py new file mode 100644 index 0000000..ff657a6 --- /dev/null +++ b/cortex/cli.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +"""Cortex CLI β€” unified entry point for all intelligence modules. + +Usage: + cortex triage score "task description" + cortex health [--json] + cortex feedback --since 6h [--dry-run] + cortex hygiene stats|stale|duplicates|orphans|archive + cortex roadmap list|add|update|overdue|upcoming|report + cortex validate --transcript --task "description" + cortex search "query" [--memory-dir ~/clawd/memory] + cortex handoff --from --to --task "description" + cortex version +""" + +import sys + + +def main(): + if len(sys.argv) < 2: + print(__doc__.strip()) + sys.exit(1) + + cmd = sys.argv[1] + # Strip the command from argv so sub-modules see clean args + sys.argv = [f"cortex {cmd}"] + sys.argv[2:] + + if cmd == "version": + from cortex import __version__ + print(f"cortex {__version__}") + + elif cmd == "triage": + from cortex.triage import main as triage_main + triage_main() + + elif cmd == "health": + from cortex.health_scanner import main as health_main + health_main() + + elif cmd == "feedback": + from cortex.feedback_loop import main as feedback_main + feedback_main() + + elif cmd == "hygiene": + from cortex.memory_hygiene import main as hygiene_main + hygiene_main() + + elif cmd == "roadmap": + from cortex.roadmap import main as roadmap_main + roadmap_main() + + elif cmd == "validate": + from cortex.validate_output import main as validate_main + validate_main() + + elif cmd == "search": + from cortex.enhanced_search import main as search_main + search_main() + + elif cmd == "handoff": + from cortex.auto_handoff import main as handoff_main + handoff_main() + + elif cmd in ("-h", "--help", "help"): + print(__doc__.strip()) + + else: + print(f"Unknown command: {cmd}") + print(__doc__.strip()) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/cortex/composite_scorer.py b/cortex/composite_scorer.py new file mode 100755 index 0000000..9253eab --- /dev/null +++ b/cortex/composite_scorer.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +""" +Composite Scorer β€” Re-rank memory search results using recency, source-type, +and multi-term confidence weighting. + +Formula: final_score = w_search * search_score + w_recency * recency_score + w_source * source_weight + +All weights configurable via config.json. +""" + +import json +import math +import re +from dataclasses import dataclass, field, asdict +from datetime import datetime, date +from pathlib import Path +from typing import Optional + +# Date patterns for extraction +DATE_PATH_RE = re.compile(r'(\d{4})-(\d{2})-(\d{2})') +DATE_CONTENT_RE = re.compile(r'(?:^|\s)(\d{4})-(\d{2})-(\d{2})(?:\s|$|\.)') + +CONFIG_PATH = Path(__file__).parent / "config.json" + + +@dataclass +class SearchResult: + """A search result to be re-scored.""" + text: str + source_path: str = "" + original_score: float = 0.0 + metadata: dict = field(default_factory=dict) + final_score: float = 0.0 + score_breakdown: dict = field(default_factory=dict) + + +def load_config(path: Optional[Path] = None) -> dict: + """Load scoring configuration from JSON.""" + p = path or CONFIG_PATH + if p.exists(): + with open(p) as f: + return json.load(f).get("composite_scoring", {}) + return {} + + +def extract_date(result: SearchResult) -> Optional[date]: + """Extract the most relevant date from a search result's path or content.""" + # Try path first (most reliable) + m = DATE_PATH_RE.search(result.source_path) + if m: + try: + return date(int(m.group(1)), int(m.group(2)), int(m.group(3))) + except ValueError: + pass + + # Try metadata + for key in ("date", "timestamp", "created", "modified"): + if key in result.metadata: + val = str(result.metadata[key]) + m = DATE_PATH_RE.search(val) + if m: + try: + return date(int(m.group(1)), int(m.group(2)), int(m.group(3))) + except ValueError: + pass + + # Try content (first match) + m = DATE_CONTENT_RE.search(result.text[:500]) + if m: + try: + return date(int(m.group(1)), int(m.group(2)), int(m.group(3))) + except ValueError: + pass + + return None + + +def recency_score(result_date: Optional[date], reference_date: Optional[date] = None, + half_life_days: float = 14.0) -> float: + """Calculate recency score using exponential decay. + + Returns 1.0 for today, ~0.5 at half_life_days ago, approaching 0 for old results. + Returns 0.3 (neutral) if no date can be determined. + """ + if result_date is None: + return 0.3 # neutral score for undated content + + ref = reference_date or date.today() + days_old = (ref - result_date).days + if days_old < 0: + days_old = 0 # future dates treated as today + + # Exponential decay: score = 2^(-days/half_life) + return math.pow(2, -days_old / half_life_days) + + +def source_weight(source_path: str, source_weights: dict) -> float: + """Determine source weight based on path pattern matching. + + Matches are tried from most specific to least specific. + """ + if not source_path: + return source_weights.get("default", 0.4) + + # Normalize path + normalized = source_path.replace("\\", "/") + + # Check exact/prefix matches first, then regex patterns + best_score = source_weights.get("default", 0.4) + best_specificity = 0 + + for pattern, weight in source_weights.items(): + if pattern == "default": + continue + # Check if pattern is a regex (contains backslash-d or similar) + if "\\" in pattern or "\\d" in pattern: + try: + if re.search(pattern, normalized): + specificity = len(pattern) + if specificity > best_specificity: + best_score = weight + best_specificity = specificity + except re.error: + pass + else: + # Simple substring/prefix match + if pattern in normalized: + specificity = len(pattern) + if specificity > best_specificity: + best_score = weight + best_specificity = specificity + + return best_score + + +def multi_term_confidence(query: str, text: str) -> float: + """Score boost based on how many query terms appear in result text. + + Returns 0.0 to 1.0 based on fraction of query terms found. + """ + terms = [t.lower() for t in query.split() if len(t) > 2] + if not terms: + return 0.5 + text_lower = text.lower() + matched = sum(1 for t in terms if t in text_lower) + return matched / len(terms) + + +def score_results(results: list[SearchResult], query: str = "", + config: Optional[dict] = None, + reference_date: Optional[date] = None, + weight_overrides: Optional[dict] = None) -> list[SearchResult]: + """Apply composite scoring to a list of search results and return sorted. + + Args: + results: List of SearchResult objects to score. + query: Original query string (for multi-term matching). + config: Scoring config dict. Loaded from config.json if None. + reference_date: Date to compute recency from. Defaults to today. + weight_overrides: Override specific weights (e.g. from intent classifier). + + Returns: + Results sorted by final_score descending. + """ + if not results: + return results + + cfg = config or load_config() + w_search = cfg.get("w_search", 0.45) + w_recency = cfg.get("w_recency", 0.30) + w_source = cfg.get("w_source", 0.25) + half_life = cfg.get("recency_half_life_days", 14.0) + src_weights = cfg.get("source_weights", {"default": 0.4}) + multi_boost = cfg.get("multi_term_boost", 0.15) + + # Apply overrides from intent classifier + if weight_overrides: + w_search = weight_overrides.get("w_search", w_search) + w_recency = weight_overrides.get("w_recency", w_recency) + w_source = weight_overrides.get("w_source", w_source) + # Source-specific boosts + if "w_source_boost" in weight_overrides: + src_weights = dict(src_weights) # copy + for pattern, boost in weight_overrides["w_source_boost"].items(): + src_weights[pattern] = src_weights.get(pattern, 0.4) + boost + + for r in results: + r_date = extract_date(r) + r_recency = recency_score(r_date, reference_date, half_life) + r_source = source_weight(r.source_path, src_weights) + r_confidence = multi_term_confidence(query, r.text) + + # Composite formula + base = w_search * r.original_score + w_recency * r_recency + w_source * r_source + boost = multi_boost * r_confidence + r.final_score = min(1.0, base + boost) + + r.score_breakdown = { + "search": round(r.original_score, 4), + "recency": round(r_recency, 4), + "source": round(r_source, 4), + "confidence": round(r_confidence, 4), + "final": round(r.final_score, 4), + } + + results.sort(key=lambda r: r.final_score, reverse=True) + return results + + +if __name__ == "__main__": + # Demo usage + demo_results = [ + SearchResult(text="Gateway watchdog fix applied", source_path="memory/2026-02-07.md", original_score=0.8), + SearchResult(text="Gateway architecture decisions", source_path="MEMORY.md", original_score=0.75), + SearchResult(text="Old gateway notes", source_path="memory/2025-12-01.md", original_score=0.85), + ] + scored = score_results(demo_results, query="gateway fix") + for r in scored: + print(f" {r.final_score:.3f} | {r.source_path}: {r.text[:60]}") + print(f" {r.score_breakdown}") diff --git a/cortex/enhanced_search.py b/cortex/enhanced_search.py new file mode 100755 index 0000000..430c86c --- /dev/null +++ b/cortex/enhanced_search.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +""" +Enhanced Search β€” Integration wrapper combining intent classification, +composite scoring, and memory search into a single pipeline. + +Pipeline: +1. Classify intent (WHO/WHEN/WHY/WHAT) +2. Adjust scoring weights based on intent +3. Run search via unified-memory.py or file-based search +4. Apply composite scoring to re-rank results +5. Return re-ranked results + +Usage: + python3 enhanced_search.py "query" + python3 enhanced_search.py --json "query" + python3 enhanced_search.py --top 5 "query" +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import time +from dataclasses import asdict +from pathlib import Path +from typing import Optional + +from composite_scorer import SearchResult, score_results, load_config as load_scorer_config +from intent_classifier import classify, IntentResult + +UNIFIED_MEMORY_SCRIPT = Path.home() / "clawd" / "scripts" / "unified-memory.py" +PYTHON = sys.executable or "/usr/bin/python3" + +# Paths to search directly if unified-memory.py is unavailable +SEARCH_PATHS = [ + Path.home() / "clawd" / "memory", + Path.home() / "clawd" / "companies", + Path.home() / "clawd" / "MEMORY.md", + Path.home() / "life" / "areas", +] + + +def search_files(query: str, max_results: int = 20) -> list[SearchResult]: + """Fallback file-based search using grep. + + Searches through memory files for query terms and returns scored results. + """ + results = [] + terms = [t for t in query.lower().split() if len(t) > 2] + if not terms: + return results + + for search_path in SEARCH_PATHS: + if not search_path.exists(): + continue + + if search_path.is_file(): + files = [search_path] + else: + files = sorted(search_path.rglob("*.md"), reverse=True)[:100] + + for fpath in files: + try: + content = fpath.read_text(errors="ignore") + except (OSError, PermissionError): + continue + + content_lower = content.lower() + matched = sum(1 for t in terms if t in content_lower) + if matched == 0: + continue + + # Score based on term coverage + score = matched / len(terms) + + # Extract best matching snippet (around first match) + snippet = _extract_snippet(content, terms) + + results.append(SearchResult( + text=snippet, + source_path=str(fpath), + original_score=score, + metadata={"matched_terms": matched, "total_terms": len(terms)}, + )) + + # Sort by original score and limit + results.sort(key=lambda r: r.original_score, reverse=True) + return results[:max_results] + + +def _extract_snippet(content: str, terms: list[str], context_chars: int = 200) -> str: + """Extract a snippet around the first matching term.""" + content_lower = content.lower() + best_pos = len(content) + for t in terms: + pos = content_lower.find(t) + if 0 <= pos < best_pos: + best_pos = pos + + if best_pos >= len(content): + return content[:300] + + start = max(0, best_pos - context_chars // 2) + end = min(len(content), best_pos + context_chars) + snippet = content[start:end].strip() + if start > 0: + snippet = "..." + snippet + if end < len(content): + snippet = snippet + "..." + return snippet + + +def search_unified(query: str, max_results: int = 20, + timeout: float = 10.0) -> list[SearchResult]: + """Search via unified-memory.py script.""" + if not UNIFIED_MEMORY_SCRIPT.exists(): + return search_files(query, max_results) + + try: + proc = subprocess.run( + [PYTHON, str(UNIFIED_MEMORY_SCRIPT), "--json", query], + capture_output=True, text=True, timeout=timeout, + ) + if proc.returncode != 0: + return search_files(query, max_results) + + data = json.loads(proc.stdout) + results = [] + for item in data.get("results", [])[:max_results]: + results.append(SearchResult( + text=item.get("text", ""), + source_path=item.get("metadata", {}).get("path", item.get("source", "")), + original_score=item.get("score", 0.5), + metadata=item.get("metadata", {}), + )) + return results + + except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception): + return search_files(query, max_results) + + +def enhanced_search(query: str, max_results: int = 10, + use_unified: bool = True) -> dict: + """Run the full enhanced search pipeline. + + Args: + query: Search query string. + max_results: Maximum results to return. + use_unified: Whether to try unified-memory.py first. + + Returns: + Dict with intent, results, and timing info. + """ + pipeline_start = time.perf_counter() + + # Step 1: Classify intent + intent_result = classify(query) + + # Step 2: Search + search_start = time.perf_counter() + if use_unified: + raw_results = search_unified(query, max_results=max_results * 2) + else: + raw_results = search_files(query, max_results=max_results * 2) + search_ms = (time.perf_counter() - search_start) * 1000 + + # Step 3: Apply composite scoring with intent-adjusted weights + scoring_start = time.perf_counter() + scored = score_results( + raw_results, query=query, + weight_overrides=intent_result.weight_adjustments, + ) + scoring_ms = (time.perf_counter() - scoring_start) * 1000 + + # Step 4: Trim to max results + final = scored[:max_results] + + pipeline_ms = (time.perf_counter() - pipeline_start) * 1000 + + return { + "query": query, + "intent": { + "type": intent_result.intent, + "confidence": intent_result.confidence, + "signals": intent_result.matched_signals, + "classification_ms": intent_result.classification_ms, + }, + "results": [ + { + "text": r.text[:500], + "source": r.source_path, + "score": round(r.final_score, 4), + "breakdown": r.score_breakdown, + } + for r in final + ], + "timing": { + "classification_ms": round(intent_result.classification_ms, 2), + "search_ms": round(search_ms, 2), + "scoring_ms": round(scoring_ms, 2), + "total_ms": round(pipeline_ms, 2), + }, + "total_raw": len(raw_results), + "total_returned": len(final), + } + + +def main(): + parser = argparse.ArgumentParser(description="Enhanced memory search with intent classification and composite scoring") + parser.add_argument("query", help="Search query") + parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON") + parser.add_argument("--top", type=int, default=10, help="Number of results (default: 10)") + parser.add_argument("--no-unified", action="store_true", help="Skip unified-memory.py, use file search only") + args = parser.parse_args() + + result = enhanced_search(args.query, max_results=args.top, use_unified=not args.no_unified) + + if args.json_output: + print(json.dumps(result, indent=2, ensure_ascii=False)) + else: + intent = result["intent"] + print(f"\nπŸ” Query: {result['query']}") + print(f"🎯 Intent: {intent['type']} (confidence: {intent['confidence']:.0%})") + if intent["signals"]: + print(f" Signals: {', '.join(intent['signals'])}") + print(f"⏱️ Total: {result['timing']['total_ms']:.0f}ms " + f"(classify: {result['timing']['classification_ms']:.1f}ms, " + f"search: {result['timing']['search_ms']:.0f}ms, " + f"score: {result['timing']['scoring_ms']:.1f}ms)") + print(f"πŸ“Š {result['total_returned']}/{result['total_raw']} results\n") + + for i, r in enumerate(result["results"], 1): + source = Path(r["source"]).name if r["source"] else "unknown" + print(f" {i}. [{r['score']:.3f}] {source}") + text_preview = r["text"][:120].replace("\n", " ") + print(f" {text_preview}") + print() + + +if __name__ == "__main__": + main() diff --git a/cortex/feedback_loop.py b/cortex/feedback_loop.py new file mode 100644 index 0000000..44e322e --- /dev/null +++ b/cortex/feedback_loop.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 +"""Feedback Loop Engine β€” automatically extract lessons from session transcripts. + +Scans OpenClaw session JSONL files, detects patterns (corrections, retries, +tool failures, knowledge gaps, self-acknowledged errors), and appends +findings to growth-log.md. + +Usage: + python3 feedback_loop.py --since 24h + python3 feedback_loop.py --since 7d --dry-run + python3 feedback_loop.py --session +""" + +import argparse +import json +import os +import re +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any + +DEFAULT_CONFIG_PATH = Path(__file__).parent / "config.json" + + +def load_config(path: Path | None = None) -> dict: + p = path or DEFAULT_CONFIG_PATH + if p.exists(): + with open(p) as f: + return json.load(f) + return {} + + +def expand(p: str) -> Path: + return Path(os.path.expanduser(p)) + + +def parse_since(since: str) -> datetime: + """Parse '24h', '7d', '2h', etc. into a UTC datetime.""" + m = re.match(r"^(\d+)([hdm])$", since.strip()) + if not m: + raise ValueError(f"Invalid --since format: {since!r}. Use e.g. 24h, 7d, 30m") + val, unit = int(m.group(1)), m.group(2) + delta = {"h": timedelta(hours=val), "d": timedelta(days=val), "m": timedelta(minutes=val)}[unit] + return datetime.now(timezone.utc) - delta + + +def parse_jsonl(path: Path) -> list[dict]: + """Parse a JSONL file, skipping malformed lines.""" + entries = [] + with open(path) as f: + for i, line in enumerate(f, 1): + line = line.strip() + if not line: + continue + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + pass + return entries + + +def get_session_timestamp(entries: list[dict]) -> datetime | None: + """Extract session start timestamp.""" + for e in entries: + ts = e.get("timestamp") + if ts: + try: + return datetime.fromisoformat(ts.replace("Z", "+00:00")) + except (ValueError, TypeError): + pass + return None + + +def get_text_content(message: dict) -> str: + """Extract text from a message entry.""" + msg = message.get("message", {}) + content = msg.get("content", "") + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + parts.append(item.get("text", "")) + return " ".join(parts) + return "" + + +def get_tool_name(message: dict) -> str | None: + """Extract tool name from a toolCall content item.""" + msg = message.get("message", {}) + content = msg.get("content", []) + if isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "toolCall": + return item.get("name") + return None + + +def get_tool_result_error(entry: dict) -> str | None: + """Check if a toolResult entry contains an error.""" + msg = entry.get("message", {}) + if msg.get("isError"): + content = msg.get("content", []) + if isinstance(content, list): + for item in content: + if isinstance(item, dict): + return item.get("text", "error") + return "error" + return None + + +# --- Detection Heuristics --- + +class Finding: + def __init__(self, category: str, trigger: str, lines: tuple[int, int], + session_file: str, confidence: float = 1.0): + self.category = category + self.trigger = trigger + self.lines = lines + self.session_file = session_file + self.confidence = confidence + + +def detect_corrections(entries: list[dict], config: dict, session_file: str) -> list[Finding]: + """Detect user corrections after agent actions.""" + keywords = config.get("detection", {}).get("correction_keywords", + ["nein", "falsch", "wrong", "nicht", "stop"]) + findings = [] + prev_was_assistant = False + prev_line = 0 + for i, entry in enumerate(entries): + msg = entry.get("message", {}) + role = msg.get("role") + if role == "assistant": + prev_was_assistant = True + prev_line = i + continue + if role == "user" and prev_was_assistant: + text = get_text_content(entry).lower() + for kw in keywords: + if re.search(r'\b' + re.escape(kw) + r'\b', text): + findings.append(Finding( + category="correction", + trigger=f"User said '{kw}' after agent response", + lines=(prev_line + 1, i + 1), + session_file=session_file, + confidence=0.7, + )) + break + if role == "user": + prev_was_assistant = False + return findings + + +def detect_retries(entries: list[dict], config: dict, session_file: str) -> list[Finding]: + """Detect same tool called N+ times in sequence.""" + threshold = config.get("detection", {}).get("retry_threshold", 3) + findings = [] + current_tool = None + count = 0 + start_line = 0 + for i, entry in enumerate(entries): + tool = get_tool_name(entry) + if tool: + if tool == current_tool: + count += 1 + else: + if current_tool and count >= threshold: + findings.append(Finding( + category="retry", + trigger=f"Tool '{current_tool}' called {count} times in sequence", + lines=(start_line + 1, i), + session_file=session_file, + )) + current_tool = tool + count = 1 + start_line = i + # Final check + if current_tool and count >= threshold: + findings.append(Finding( + category="retry", + trigger=f"Tool '{current_tool}' called {count} times in sequence", + lines=(start_line + 1, len(entries)), + session_file=session_file, + )) + return findings + + +def detect_tool_failures(entries: list[dict], config: dict, session_file: str) -> list[Finding]: + """Detect tool result errors.""" + error_patterns = config.get("detection", {}).get("error_patterns", + ["error", "Error", "failed", "Failed"]) + findings = [] + for i, entry in enumerate(entries): + if entry.get("type") == "message": + msg = entry.get("message", {}) + # Explicit isError + err = get_tool_result_error(entry) + if err: + findings.append(Finding( + category="tool_failure", + trigger=f"Tool returned error: {err[:120]}", + lines=(i + 1, i + 1), + session_file=session_file, + )) + continue + # Check tool result content for error patterns + if msg.get("role") == "toolResult": + content = msg.get("content", []) + if isinstance(content, list): + for item in content: + text = item.get("text", "") if isinstance(item, dict) else "" + for pat in error_patterns: + if pat in text: + findings.append(Finding( + category="tool_failure", + trigger=f"Tool result contains '{pat}': {text[:100]}", + lines=(i + 1, i + 1), + session_file=session_file, + confidence=0.5, + )) + break + else: + continue + break + return findings + + +def detect_self_errors(entries: list[dict], config: dict, session_file: str) -> list[Finding]: + """Detect agent self-acknowledged errors.""" + keywords = config.get("detection", {}).get("apology_keywords", + ["sorry", "entschuldigung", "my mistake"]) + findings = [] + for i, entry in enumerate(entries): + msg = entry.get("message", {}) + if msg.get("role") == "assistant": + text = get_text_content(entry).lower() + for kw in keywords: + if kw.lower() in text: + findings.append(Finding( + category="self_error", + trigger=f"Agent acknowledged error with '{kw}'", + lines=(i + 1, i + 1), + session_file=session_file, + confidence=0.6, + )) + break + return findings + + +def detect_knowledge_gaps(entries: list[dict], config: dict, session_file: str) -> list[Finding]: + """Detect when user provides the answer after agent couldn't find it.""" + findings = [] + gap_phrases = ["i don't know", "ich weiß nicht", "couldn't find", "konnte nicht finden", + "unable to", "no results", "not found", "keine ergebnisse"] + for i, entry in enumerate(entries): + msg = entry.get("message", {}) + if msg.get("role") == "assistant": + text = get_text_content(entry).lower() + has_gap = any(p in text for p in gap_phrases) + if has_gap: + # Check if next user message provides info + for j in range(i + 1, min(i + 4, len(entries))): + next_msg = entries[j].get("message", {}) + if next_msg.get("role") == "user": + user_text = get_text_content(entries[j]) + if len(user_text) > 20: # Substantial answer + findings.append(Finding( + category="knowledge_gap", + trigger=f"Agent couldn't find answer, user provided it", + lines=(i + 1, j + 1), + session_file=session_file, + confidence=0.6, + )) + break + return findings + + +def analyze_session(entries: list[dict], config: dict, session_file: str) -> list[Finding]: + """Run all detectors on a session.""" + min_conf = config.get("detection", {}).get("min_confidence", 0.5) + findings = [] + for detector in [detect_corrections, detect_retries, detect_tool_failures, + detect_self_errors, detect_knowledge_gaps]: + findings.extend(detector(entries, config, session_file)) + return [f for f in findings if f.confidence >= min_conf] + + +def finding_to_markdown(finding: Finding, config: dict) -> str: + """Format a finding as growth-log markdown.""" + categories = config.get("categories", {}) + cat_label = categories.get(finding.category, finding.category.replace("_", " ").title()) + date = datetime.now().strftime("%Y-%m-%d") + session_name = Path(finding.session_file).stem + return f"""### {date} β€” [Auto-detected: {cat_label}] +- **Trigger:** {finding.trigger} +- **Erkenntnis:** Auto-detected pattern β€” review recommended +- **Neue FΓ€higkeit:** Awareness of recurring pattern +- **Source:** {session_name} lines {finding.lines[0]}-{finding.lines[1]} +""" + + +def deduplicate_findings(findings: list[Finding]) -> list[Finding]: + """Remove duplicate findings by category + similar trigger.""" + seen = set() + unique = [] + for f in findings: + key = (f.category, f.trigger[:60]) + if key not in seen: + seen.add(key) + unique.append(f) + return unique + + +def append_to_growth_log(findings: list[Finding], config: dict, dry_run: bool = False) -> str: + """Append findings to growth-log.md. Returns the text that was/would be appended.""" + if not findings: + return "" + text = "\n---\n\n" + for f in findings: + text += finding_to_markdown(f, config) + "\n" + if dry_run: + return text + log_path = expand(config.get("growth_log_path", "~/clawd/memory/growth-log.md")) + with open(log_path, "a") as fh: + fh.write(text) + return text + + +def get_recent_sessions(config: dict, since: datetime) -> list[Path]: + """Get session files modified since the given time.""" + sessions_dir = expand(config.get("sessions_dir", "~/.openclaw/agents/main/sessions")) + if not sessions_dir.exists(): + return [] + files = [] + for f in sessions_dir.glob("*.jsonl"): + mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc) + if mtime >= since: + files.append(f) + return sorted(files, key=lambda f: f.stat().st_mtime, reverse=True) + + +def run(since: str = "24h", session: str | None = None, dry_run: bool = False, + config_path: str | None = None) -> list[Finding]: + """Main entry point.""" + config = load_config(Path(config_path) if config_path else None) + if config.get("dry_run"): + dry_run = True + + if session: + session_path = Path(session) + if not session_path.exists(): + print(f"Session file not found: {session}", file=sys.stderr) + sys.exit(1) + sessions = [session_path] + else: + cutoff = parse_since(since) + sessions = get_recent_sessions(config, cutoff) + + all_findings: list[Finding] = [] + for s in sessions: + entries = parse_jsonl(s) + findings = analyze_session(entries, config, str(s)) + all_findings.extend(findings) + + all_findings = deduplicate_findings(all_findings) + output = append_to_growth_log(all_findings, config, dry_run=dry_run) + + if output: + if dry_run: + print("=== DRY RUN β€” would append: ===") + print(output) + else: + print("No patterns detected.") + + return all_findings + + +def main(): + parser = argparse.ArgumentParser(description="Feedback Loop Engine") + parser.add_argument("--since", default="24h", help="Time window (e.g. 24h, 7d, 30m)") + parser.add_argument("--session", help="Analyze a specific session file") + parser.add_argument("--dry-run", action="store_true", help="Don't write to growth-log") + parser.add_argument("--config", help="Path to config.json") + args = parser.parse_args() + run(since=args.since, session=args.session, dry_run=args.dry_run, config_path=args.config) + + +if __name__ == "__main__": + main() diff --git a/cortex/health_scanner.py b/cortex/health_scanner.py new file mode 100644 index 0000000..454d627 --- /dev/null +++ b/cortex/health_scanner.py @@ -0,0 +1,515 @@ +#!/usr/bin/env python3 +"""Proactive Health Scanner β€” finds problems BEFORE they become alerts. + +Checks: agent health, resource trends, dependency health, configuration drift. +Designed for cron: fast (<30s), no side effects. + +Usage: + python3 health_scanner.py [--json] [--section agents|resources|deps|config] +""" + +import argparse +import hashlib +import json +import os +import re +import shutil +import subprocess +import sys +import time +import urllib.request +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any + +# --- Constants --- +OPENCLAW_CONFIG = Path.home() / ".openclaw" / "openclaw.json" +OPENCLAW_AGENTS_DIR = Path.home() / ".openclaw" / "agents" +DEPRECATED_MODELS = { + "anthropic/claude-3-5-haiku-latest": "2026-02-19", + "claude-3-5-haiku-latest": "2026-02-19", +} +DISK_HISTORY_FILE = Path.home() / ".cache" / "health-scanner" / "disk_history.json" +OLLAMA_URL = "http://localhost:11434/api/tags" + +INFO, WARN, CRITICAL = "INFO", "WARN", "CRITICAL" + + +def _severity_rank(s: str) -> int: + return {"INFO": 0, "WARN": 1, "CRITICAL": 2}.get(s, -1) + + +class Finding: + def __init__(self, section: str, severity: str, title: str, detail: str = ""): + self.section = section + self.severity = severity + self.title = title + self.detail = detail + + def to_dict(self) -> dict: + return {"section": self.section, "severity": self.severity, + "title": self.title, "detail": self.detail} + + +class HealthScanner: + def __init__(self): + self.findings: list[Finding] = [] + self.config: dict = {} + self._load_config() + + def _load_config(self): + try: + self.config = json.loads(OPENCLAW_CONFIG.read_text()) + except Exception: + self.findings.append(Finding("config", CRITICAL, "Cannot read OpenClaw config", + str(OPENCLAW_CONFIG))) + + def _add(self, section: str, severity: str, title: str, detail: str = ""): + self.findings.append(Finding(section, severity, title, detail)) + + # --- A) Agent Health --- + def check_agents(self): + agents = self.config.get("agents", {}).get("list", []) + if not agents: + self._add("agents", WARN, "No agents found in config") + return + + for agent in agents: + aid = agent.get("id", "unknown") + # Workspace check + ws = agent.get("workspace", "") + if ws: + wp = Path(ws) + if not wp.exists(): + self._add("agents", CRITICAL, f"Agent '{aid}' workspace missing", ws) + elif not os.access(wp, os.W_OK): + self._add("agents", WARN, f"Agent '{aid}' workspace not writable", ws) + else: + self._add("agents", INFO, f"Agent '{aid}' workspace OK") + else: + self._add("agents", WARN, f"Agent '{aid}' has no workspace configured") + + # Session activity check + session_dir = OPENCLAW_AGENTS_DIR / aid / "sessions" + if session_dir.exists(): + sessions = list(session_dir.iterdir()) + if sessions: + latest_mtime = max(f.stat().st_mtime for f in sessions if f.is_file()) + age_days = (time.time() - latest_mtime) / 86400 + if age_days > 7: + self._add("agents", WARN, + f"Agent '{aid}' inactive for {age_days:.0f} days", + f"Last session activity: {datetime.fromtimestamp(latest_mtime).isoformat()}") + else: + self._add("agents", INFO, + f"Agent '{aid}' last active {age_days:.1f} days ago") + else: + self._add("agents", INFO, f"Agent '{aid}' has no session files") + else: + self._add("agents", INFO, f"Agent '{aid}' session dir not found") + + # Model reachability + model = agent.get("model", {}).get("primary", "") + if model.startswith("ollama"): + self._check_ollama_model(aid, model) + elif model: + self._add("agents", INFO, f"Agent '{aid}' uses cloud model: {model}") + + def _check_ollama_model(self, aid: str, model: str): + """Check if ollama model is reachable.""" + try: + req = urllib.request.Request(OLLAMA_URL, method="GET") + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + model_name = model.split("/", 1)[-1] if "/" in model else model + available = [m.get("name", "") for m in data.get("models", [])] + if any(model_name in m for m in available): + self._add("agents", INFO, f"Agent '{aid}' ollama model available: {model_name}") + else: + self._add("agents", WARN, + f"Agent '{aid}' ollama model not found: {model_name}", + f"Available: {', '.join(available[:10])}") + except Exception as e: + self._add("agents", WARN, f"Agent '{aid}' ollama unreachable for model check", + str(e)) + + # --- B) Resource Trends --- + def check_resources(self): + # Disk usage + self._check_disk() + # Memory + self._check_memory() + # Session file accumulation + self._check_session_files() + # SQLite DB sizes + self._check_db_sizes() + # Log file sizes + self._check_log_sizes() + + def _check_disk(self): + usage = shutil.disk_usage("/") + pct = usage.used / usage.total * 100 + free_gb = usage.free / (1024 ** 3) + if pct > 95: + self._add("resources", CRITICAL, f"Disk {pct:.1f}% full ({free_gb:.1f}GB free)") + elif pct > 85: + self._add("resources", WARN, f"Disk {pct:.1f}% full ({free_gb:.1f}GB free)") + else: + self._add("resources", INFO, f"Disk {pct:.1f}% used ({free_gb:.1f}GB free)") + + # Trend tracking + self._track_disk_trend(usage.used) + + def _track_disk_trend(self, current_bytes: int): + try: + DISK_HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True) + history = [] + if DISK_HISTORY_FILE.exists(): + history = json.loads(DISK_HISTORY_FILE.read_text()) + now = time.time() + history.append({"ts": now, "used": current_bytes}) + # Keep last 7 days + cutoff = now - 7 * 86400 + history = [h for h in history if h["ts"] > cutoff] + DISK_HISTORY_FILE.write_text(json.dumps(history)) + + if len(history) >= 2: + oldest, newest = history[0], history[-1] + dt = newest["ts"] - oldest["ts"] + if dt > 3600: # at least 1 hour of data + growth_per_day = (newest["used"] - oldest["used"]) / dt * 86400 + gb_per_day = growth_per_day / (1024 ** 3) + if gb_per_day > 1: + total = shutil.disk_usage("/").total + remaining = total - current_bytes + days_left = remaining / growth_per_day if growth_per_day > 0 else 999 + self._add("resources", WARN, + f"Disk growing {gb_per_day:.1f}GB/day, ~{days_left:.0f} days until full") + except Exception: + pass + + def _check_memory(self): + try: + with open("/proc/meminfo") as f: + info = {} + for line in f: + parts = line.split(":") + if len(parts) == 2: + key = parts[0].strip() + val = int(parts[1].strip().split()[0]) # kB + info[key] = val + total = info.get("MemTotal", 1) + available = info.get("MemAvailable", total) + used_pct = (1 - available / total) * 100 + if used_pct > 90: + self._add("resources", CRITICAL, f"Memory {used_pct:.0f}% used") + elif used_pct > 80: + self._add("resources", WARN, f"Memory {used_pct:.0f}% used") + else: + self._add("resources", INFO, f"Memory {used_pct:.0f}% used") + except Exception as e: + self._add("resources", WARN, "Cannot read memory info", str(e)) + + def _check_session_files(self): + if not OPENCLAW_AGENTS_DIR.exists(): + return + for agent_dir in OPENCLAW_AGENTS_DIR.iterdir(): + sessions = agent_dir / "sessions" + if sessions.exists(): + count = sum(1 for _ in sessions.iterdir()) + if count > 1000: + self._add("resources", WARN, + f"Agent '{agent_dir.name}' has {count} session files") + elif count > 500: + self._add("resources", INFO, + f"Agent '{agent_dir.name}' has {count} session files") + + def _check_db_sizes(self): + """Check for large SQLite/DB files.""" + search_paths = [ + Path.home() / ".openclaw", + Path.home() / "clawd", + ] + for base in search_paths: + if not base.exists(): + continue + try: + for db_file in base.rglob("*.db"): + size_mb = db_file.stat().st_size / (1024 ** 2) + if size_mb > 500: + self._add("resources", WARN, + f"Large DB: {db_file} ({size_mb:.0f}MB)") + for db_file in base.rglob("*.sqlite"): + size_mb = db_file.stat().st_size / (1024 ** 2) + if size_mb > 500: + self._add("resources", WARN, + f"Large DB: {db_file} ({size_mb:.0f}MB)") + except PermissionError: + pass + + def _check_log_sizes(self): + log_dirs = [ + Path.home() / ".openclaw" / "logs", + Path.home() / "clawd" / "logs", + Path("/tmp"), + ] + for d in log_dirs: + if not d.exists(): + continue + try: + for f in d.iterdir(): + if f.suffix in (".log", ".txt") and f.is_file(): + size_mb = f.stat().st_size / (1024 ** 2) + if size_mb > 100: + self._add("resources", WARN, + f"Large log: {f} ({size_mb:.0f}MB)") + except PermissionError: + pass + + # --- C) Dependency Health --- + def check_deps(self): + self._check_nats() + self._check_typedb() + self._check_chromadb() + self._check_ollama() + self._check_key_expiry() + + def _run_cmd(self, cmd: list[str], timeout: int = 5) -> tuple[int, str]: + try: + r = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + return r.returncode, r.stdout.strip() + except subprocess.TimeoutExpired: + return -1, "timeout" + except FileNotFoundError: + return -2, "not found" + except Exception as e: + return -3, str(e) + + def _check_docker_container(self, name: str) -> bool: + """Check if a service is running as a Docker container.""" + rc, out = self._run_cmd( + ["sg", "docker", "-c", f"docker ps --filter name={name} --format '{{{{.Names}}}} {{{{.Status}}}}'"], + timeout=10 + ) + if rc == 0 and out.strip(): + return True + return False + + def _check_nats(self): + rc, out = self._run_cmd(["systemctl", "--user", "is-active", "nats-server"]) + if rc == 0 and out == "active": + self._add("deps", INFO, "NATS server active") + elif self._check_docker_container("nats"): + self._add("deps", INFO, "NATS server active (Docker)") + else: + self._add("deps", CRITICAL, "NATS server not active", out) + + def _check_typedb(self): + # Check for typedb process + rc, out = self._run_cmd(["pgrep", "-f", "typedb"]) + if rc == 0: + self._add("deps", INFO, "TypeDB running") + elif self._check_docker_container("typedb"): + self._add("deps", INFO, "TypeDB running (Docker)") + else: + rc2, out2 = self._run_cmd(["systemctl", "--user", "is-active", "typedb"]) + if rc2 == 0 and out2 == "active": + self._add("deps", INFO, "TypeDB service active") + else: + self._add("deps", WARN, "TypeDB not detected", "May not be running") + + def _check_chromadb(self): + rag_dirs = list(Path.home().glob("**/.rag-db"))[:3] + clawd_rag = Path.home() / "clawd" / ".rag-db" + if clawd_rag.exists(): + age_hours = (time.time() - clawd_rag.stat().st_mtime) / 3600 + if age_hours > 48: + self._add("deps", WARN, + f"ChromaDB (.rag-db) last modified {age_hours:.0f}h ago") + else: + self._add("deps", INFO, f"ChromaDB (.rag-db) fresh ({age_hours:.0f}h)") + else: + self._add("deps", INFO, "No .rag-db found in clawd workspace") + + def _check_ollama(self): + try: + req = urllib.request.Request(OLLAMA_URL, method="GET") + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + count = len(data.get("models", [])) + self._add("deps", INFO, f"Ollama reachable ({count} models)") + except Exception as e: + self._add("deps", WARN, "Ollama not reachable", str(e)) + + def _check_key_expiry(self): + """Scan env files for date-like patterns that might indicate key expiry.""" + env_files = list(Path.home().glob(".config/**/*.env")) + env_files += list(Path.home().glob("clawd/**/.env")) + env_files += [Path.home() / ".env"] + now = datetime.now() + date_pattern = re.compile(r'(\d{4}-\d{2}-\d{2})') + for ef in env_files: + if not ef.exists() or not ef.is_file(): + continue + try: + content = ef.read_text() + for match in date_pattern.finditer(content): + try: + d = datetime.fromisoformat(match.group(1)) + if now < d < now + timedelta(days=30): + self._add("deps", WARN, + f"Possible expiry date {match.group(1)} in {ef.name}", + str(ef)) + except ValueError: + pass + except Exception: + pass + + # --- D) Configuration Drift --- + def check_config(self): + self._check_deprecated_models() + self._check_config_hash() + + def _check_deprecated_models(self): + agents = self.config.get("agents", {}).get("list", []) + defaults = self.config.get("agents", {}).get("defaults", {}) + now = datetime.now() + + all_models = set() + # Collect from defaults + dm = defaults.get("model", {}) + if dm.get("primary"): + all_models.add(dm["primary"]) + for fb in dm.get("fallbacks", []): + all_models.add(fb) + for m in defaults.get("models", {}): + all_models.add(m) + # Collect from agents + for agent in agents: + am = agent.get("model", {}) + if am.get("primary"): + all_models.add(am["primary"]) + for fb in am.get("fallbacks", []): + all_models.add(fb) + hb = agent.get("heartbeat", {}) + if hb.get("model"): + all_models.add(hb["model"]) + + # Also check defaults heartbeat + dhb = defaults.get("heartbeat", {}) + if dhb.get("model"): + all_models.add(dhb["model"]) + + for model in all_models: + for dep_model, eol_date in DEPRECATED_MODELS.items(): + if dep_model in model: + eol = datetime.fromisoformat(eol_date) + days_left = (eol - now).days + if days_left < 0: + self._add("config", CRITICAL, + f"Model '{model}' is past EOL ({eol_date})") + elif days_left < 14: + self._add("config", WARN, + f"Model '{model}' EOL in {days_left} days ({eol_date})") + else: + self._add("config", INFO, + f"Model '{model}' EOL on {eol_date} ({days_left} days)") + + def _check_config_hash(self): + """Check if config file has changed since last scan.""" + hash_file = Path.home() / ".cache" / "health-scanner" / "config_hash.txt" + hash_file.parent.mkdir(parents=True, exist_ok=True) + try: + current_hash = hashlib.sha256(OPENCLAW_CONFIG.read_bytes()).hexdigest()[:16] + if hash_file.exists(): + prev_hash = hash_file.read_text().strip() + if prev_hash != current_hash: + self._add("config", INFO, "Config file changed since last scan", + f"Old: {prev_hash}, New: {current_hash}") + hash_file.write_text(current_hash) + except Exception as e: + self._add("config", WARN, "Cannot track config hash", str(e)) + + # --- Run --- + def run(self, sections: list[str] | None = None) -> dict: + section_map = { + "agents": self.check_agents, + "resources": self.check_resources, + "deps": self.check_deps, + "config": self.check_config, + } + targets = sections if sections else list(section_map.keys()) + for s in targets: + if s in section_map: + try: + section_map[s]() + except Exception as e: + self._add(s, CRITICAL, f"Section '{s}' check failed", str(e)) + + # Build report + worst = INFO + for f in self.findings: + if _severity_rank(f.severity) > _severity_rank(worst): + worst = f.severity + + return { + "timestamp": datetime.now().isoformat(), + "overall": worst, + "findings_count": { + INFO: sum(1 for f in self.findings if f.severity == INFO), + WARN: sum(1 for f in self.findings if f.severity == WARN), + CRITICAL: sum(1 for f in self.findings if f.severity == CRITICAL), + }, + "findings": [f.to_dict() for f in self.findings], + } + + +def format_human(report: dict) -> str: + lines = [] + overall = report["overall"] + icon = {"INFO": "βœ…", "WARN": "⚠️", "CRITICAL": "🚨"}.get(overall, "❓") + lines.append(f"{icon} Health Report β€” {overall}") + lines.append(f" {report['findings_count']}") + lines.append(f" {report['timestamp']}") + lines.append("") + + for sev in [CRITICAL, WARN, INFO]: + items = [f for f in report["findings"] if f["severity"] == sev] + if not items: + continue + icon = {"CRITICAL": "🚨", "WARN": "⚠️", "INFO": "ℹ️"}[sev] + lines.append(f"--- {sev} ({len(items)}) ---") + for f in items: + lines.append(f" {icon} [{f['section']}] {f['title']}") + if f["detail"]: + lines.append(f" {f['detail']}") + lines.append("") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="Proactive Health Scanner") + parser.add_argument("--json", action="store_true", help="Output JSON") + parser.add_argument("--section", type=str, help="Comma-separated sections: agents,resources,deps,config") + args = parser.parse_args() + + sections = args.section.split(",") if args.section else None + scanner = HealthScanner() + report = scanner.run(sections) + + if args.json: + print(json.dumps(report, indent=2)) + else: + print(format_human(report)) + + # Exit code: 2 for CRITICAL, 1 for WARN, 0 for INFO + if report["overall"] == CRITICAL: + sys.exit(2) + elif report["overall"] == WARN: + sys.exit(1) + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/cortex/intent_classifier.py b/cortex/intent_classifier.py new file mode 100755 index 0000000..356621f --- /dev/null +++ b/cortex/intent_classifier.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Intent Classifier β€” Classify memory queries into WHO/WHEN/WHY/WHAT intents +using regex-based heuristics. No LLM call, <5ms per query. + +Output: intent type + suggested weight adjustments for composite scorer. +""" + +import json +import re +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +CONFIG_PATH = Path(__file__).parent / "config.json" + + +@dataclass +class IntentResult: + """Classification result for a query.""" + intent: str # WHO, WHEN, WHY, WHAT + confidence: float # 0.0 to 1.0 + weight_adjustments: dict = field(default_factory=dict) + matched_signals: list = field(default_factory=list) + classification_ms: float = 0.0 + + +# Compiled patterns cache +_compiled_patterns: dict[str, list[re.Pattern]] = {} +_config_cache: Optional[dict] = None + + +def _load_config(path: Optional[Path] = None) -> dict: + """Load intent classification config.""" + global _config_cache + if _config_cache is not None: + return _config_cache + p = path or CONFIG_PATH + if p.exists(): + with open(p) as f: + _config_cache = json.load(f).get("intent_classification", {}) + return _config_cache + _config_cache = {} + return _config_cache + + +def _get_patterns(intent: str, config: dict) -> list[re.Pattern]: + """Get compiled regex patterns for an intent (cached).""" + if intent not in _compiled_patterns: + raw = config.get(intent, {}).get("patterns", []) + _compiled_patterns[intent] = [re.compile(p, re.IGNORECASE) for p in raw] + return _compiled_patterns[intent] + + +def classify(query: str, config: Optional[dict] = None) -> IntentResult: + """Classify a query into WHO/WHEN/WHY/WHAT intent. + + Uses keyword matching and regex patterns. Designed for <5ms execution. + + Args: + query: The search query string. + config: Optional config dict. Loaded from config.json if None. + + Returns: + IntentResult with intent type, confidence, and weight adjustments. + """ + start = time.perf_counter() + cfg = config or _load_config() + + if not query or not query.strip(): + elapsed = (time.perf_counter() - start) * 1000 + return IntentResult(intent="WHAT", confidence=0.1, classification_ms=elapsed, + matched_signals=["empty_query"]) + + query_lower = query.lower().strip() + scores: dict[str, float] = {"WHO": 0.0, "WHEN": 0.0, "WHY": 0.0, "WHAT": 0.0} + signals: dict[str, list[str]] = {"WHO": [], "WHEN": [], "WHY": [], "WHAT": []} + + for intent in ("WHO", "WHEN", "WHY", "WHAT"): + intent_cfg = cfg.get(intent, {}) + + # Keyword matching (fast) + keywords = intent_cfg.get("keywords", []) + for kw in keywords: + if kw.lower() in query_lower: + scores[intent] += 1.0 + signals[intent].append(f"kw:{kw}") + + # Regex pattern matching + for pattern in _get_patterns(intent, cfg): + if pattern.search(query_lower): + scores[intent] += 2.0 # patterns are more specific + signals[intent].append(f"re:{pattern.pattern[:30]}") + + # Additional heuristics + # Names (capitalized words not at start) suggest WHO β€” but exclude known non-person terms + # German nouns are capitalized β€” so caps heuristic needs extra guard: + # Only count as person-name if the word is NOT a common German/tech noun + # and there's additional WHO signal (keyword/pattern already scored). + _NON_PERSON_CAPS = { + "sparkasse", "taskboard", "uptime", "kuma", "forgejo", "traefik", "nginx", + "docker", "chromadb", "typedb", "nats", "kafka", "pinecone", "odoo", + "mondo", "gate", "vainplex", "openclaw", "telegram", "discord", "matrix", + "opus", "sonnet", "haiku", "claude", "gemini", "ollama", "mona", "vera", + "stella", "viola", "hetzner", "proxmox", "debian", "linux", "python", + "api", "cli", "dns", "ssl", "tls", "ssh", "http", "https", "sepa", + "bafin", "iso", "iban", "postgres", "sqlite", "redis", "github", + # Common German capitalized nouns that aren't people + "aufgabe", "zugang", "status", "server", "konto", "liste", "daten", + "problem", "fehler", "lΓΆsung", "version", "projekt", "system", "email", + "rechnung", "zahlung", "vertrag", "termin", "meeting", "deploy", + "update", "config", "setup", "deployment", "monitoring", "backup", + "migration", "integration", "infrastruktur", "netzwerk", "sicherheit", + } + words = query.split() + if len(words) >= 2: + # Check ALL words for name-like capitalization (including first word) + caps = [w for w in words if w[0].isupper() and len(w) > 2 + and not w.isupper() and w.lower() not in _NON_PERSON_CAPS] + if len(caps) >= 2: + # Two+ unknown capitalized words strongly suggest names (e.g. "Sebastian Baier") + scores["WHO"] += 0.8 * len(caps) + signals["WHO"].append(f"multi_caps:{','.join(caps[:3])}") + elif caps: + # Single unknown cap word β€” weak signal in German + scores["WHO"] += 0.3 + signals["WHO"].append(f"caps:{caps[0]}") + + # Date-like tokens suggest WHEN + if re.search(r'\b\d{4}[-/]\d{2}', query) or re.search(r'\b(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|januar|februar|mΓ€rz)\b', query_lower): + scores["WHEN"] += 1.5 + signals["WHEN"].append("date_token") + + # Question words at start + if query_lower.startswith(("warum ", "why ", "wieso ", "weshalb ")): + scores["WHY"] += 3.0 + signals["WHY"].append("start_why") + elif query_lower.startswith(("wer ", "who ")): + scores["WHO"] += 3.0 + signals["WHO"].append("start_who") + elif query_lower.startswith(("wann ", "when ")): + scores["WHEN"] += 3.0 + signals["WHEN"].append("start_when") + + # Pick winner + best_intent = max(scores, key=scores.get) + total = sum(scores.values()) + confidence = scores[best_intent] / total if total > 0 else 0.25 + + # If no strong signal, default to WHAT + if scores[best_intent] < 0.5: + best_intent = "WHAT" + confidence = 0.3 + + # Get weight adjustments + adjustments = cfg.get(best_intent, {}).get("weight_adjustments", {}) + + elapsed = (time.perf_counter() - start) * 1000 + return IntentResult( + intent=best_intent, + confidence=round(confidence, 3), + weight_adjustments=adjustments, + matched_signals=signals[best_intent], + classification_ms=round(elapsed, 3), + ) + + +def reset_cache(): + """Reset config and pattern caches (for testing).""" + global _config_cache, _compiled_patterns + _config_cache = None + _compiled_patterns = {} + + +if __name__ == "__main__": + import sys + queries = sys.argv[1:] or [ + "Albert Hild contact", + "when did we fix the gateway", + "why did we choose NATS over Kafka", + "Mondo Gate regulatory status", + "wer ist Sebastian Baier", + "wann wurde TypeDB eingerichtet", + "warum ChromaDB statt Pinecone", + ] + for q in queries: + r = classify(q) + print(f" [{r.intent}] ({r.confidence:.2f}, {r.classification_ms:.2f}ms) {q}") + if r.matched_signals: + print(f" signals: {r.matched_signals}") diff --git a/cortex/memory_hygiene.py b/cortex/memory_hygiene.py new file mode 100644 index 0000000..2480335 --- /dev/null +++ b/cortex/memory_hygiene.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python3 +"""Memory Hygiene Tools β€” find duplicates, stale content, orphans, stats, archive.""" + +import argparse +import hashlib +import json +import os +import re +import shutil +import sys +from collections import defaultdict +from datetime import datetime, timedelta +from pathlib import Path + +MEMORY_DIR = Path.home() / "clawd" / "memory" +ARCHIVE_DIR = MEMORY_DIR / "archive" +CONFIG_PATH = Path(__file__).parent / "config.json" + +PERMANENT_FILES = { + "MEMORY.md", "WORKING.md", "growth-log.md", "BOOT_CONTEXT.md", + "README.md", "active-context.json", "network-map.md", + "learned-context.md", "email-contacts.json", +} + +DAILY_NOTE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}(?:-.+)?\.md$") +DATE_RE = re.compile(r"\b(20\d{2})-(\d{2})-(\d{2})\b") +TODO_RE = re.compile(r"(?:TODO|FIXME|HACK|XXX)\b", re.IGNORECASE) +IN_PROGRESS_RE = re.compile(r"status:\s*in.?progress", re.IGNORECASE) +LINK_RE = re.compile(r"\[([^\]]*)\]\(([^)]+)\)") +EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.-]+") +PHONE_RE = re.compile(r"(?:\+\d[\d\s\-]{7,}|\(\d+\)\s*[\d\s\-]{5,})") + + +def load_config(): + if CONFIG_PATH.exists(): + return json.loads(CONFIG_PATH.read_text()) + return {} + + +def get_md_files(base: Path | None = None, recursive: bool = True) -> list[Path]: + if base is None: + base = MEMORY_DIR + if recursive: + return sorted(base.rglob("*.md")) + return sorted(base.glob("*.md")) + + +def _normalize(text: str) -> str: + return re.sub(r"\s+", " ", text.lower().strip()) + + +def _para_hash(para: str) -> str: + return hashlib.md5(_normalize(para).encode()).hexdigest() + + +# --- Duplicates --- + +def find_duplicates(min_length: int = 50, threshold: float = 0.8) -> list[dict]: + """Find near-duplicate paragraphs across memory files.""" + # Collect paragraphs with their fingerprints + para_index: dict[str, list[dict]] = defaultdict(list) + + for fp in get_md_files(): + try: + text = fp.read_text(errors="replace") + except Exception: + continue + paragraphs = re.split(r"\n\s*\n", text) + line = 1 + for para in paragraphs: + stripped = para.strip() + if len(stripped) < min_length: + line += para.count("\n") + 1 + continue + h = _para_hash(stripped) + para_index[h].append({ + "file": str(fp.relative_to(MEMORY_DIR)), + "line": line, + "preview": stripped[:100], + }) + line += para.count("\n") + 1 + + dupes = [] + for h, locations in para_index.items(): + if len(locations) > 1: + # Deduplicate by file (same file same hash = skip) + seen = set() + unique = [] + for loc in locations: + key = (loc["file"], loc["line"]) + if key not in seen: + seen.add(key) + unique.append(loc) + if len(unique) > 1: + dupes.append({"hash": h, "locations": unique}) + + return dupes + + +# --- Staleness --- + +def find_stale(now: datetime | None = None) -> list[dict]: + """Find potentially stale content.""" + if now is None: + now = datetime.now() + cfg = load_config() + stale_cfg = cfg.get("staleness", {}) + date_days = stale_cfg.get("date_days", 90) + todo_days = stale_cfg.get("todo_days", 30) + progress_days = stale_cfg.get("in_progress_days", 14) + contact_days = stale_cfg.get("contact_days", 180) + + results = [] + + for fp in get_md_files(): + try: + text = fp.read_text(errors="replace") + mtime = datetime.fromtimestamp(fp.stat().st_mtime) + except Exception: + continue + + rel = str(fp.relative_to(MEMORY_DIR)) + lines = text.split("\n") + file_age = (now - mtime).days + + for i, line in enumerate(lines, 1): + # Old dates in non-historical context + for m in DATE_RE.finditer(line): + try: + d = datetime(int(m.group(1)), int(m.group(2)), int(m.group(3))) + age = (now - d).days + if age > date_days and "history" not in rel.lower() and "archive" not in rel.lower(): + results.append({ + "file": rel, "line": i, + "reason": f"Date {m.group(0)} is {age} days old", + "severity": "info", + }) + except ValueError: + pass + + # Old TODOs + if TODO_RE.search(line) and file_age > todo_days: + results.append({ + "file": rel, "line": i, + "reason": f"TODO in file not modified for {file_age} days", + "severity": "warning", + }) + + # Stale in_progress + if IN_PROGRESS_RE.search(line) and file_age > progress_days: + results.append({ + "file": rel, "line": i, + "reason": f"in_progress status, file not updated for {file_age} days", + "severity": "warning", + }) + + # Contact info staleness + if (EMAIL_RE.search(line) or PHONE_RE.search(line)) and file_age > contact_days: + results.append({ + "file": rel, "line": i, + "reason": f"Contact info in file not updated for {file_age} days", + "severity": "info", + }) + + return results + + +# --- Orphans --- + +def find_orphans() -> dict: + """Find orphaned files and broken links.""" + all_files = set() + for fp in MEMORY_DIR.rglob("*"): + if fp.is_file(): + all_files.add(str(fp.relative_to(MEMORY_DIR))) + + # Collect all references + referenced = set() + broken_links = [] + + for fp in get_md_files(): + try: + text = fp.read_text(errors="replace") + except Exception: + continue + rel = str(fp.relative_to(MEMORY_DIR)) + + for m in LINK_RE.finditer(text): + target = m.group(2) + if target.startswith("http://") or target.startswith("https://"): + continue + # Resolve relative to file's directory + target_clean = target.split("#")[0].split("?")[0] + if not target_clean: + continue + resolved = (fp.parent / target_clean).resolve() + try: + ref_rel = str(resolved.relative_to(MEMORY_DIR)) + referenced.add(ref_rel) + except ValueError: + pass + if not resolved.exists(): + broken_links.append({ + "file": rel, + "link_text": m.group(1), + "target": target, + }) + + # Orphaned files (never referenced, not permanent) + orphaned = [] + for f in sorted(all_files): + name = Path(f).name + if name in PERMANENT_FILES: + continue + if f.startswith("archive/"): + continue + if f not in referenced: + orphaned.append(f) + + # Empty/near-empty files + empty = [] + for fp in MEMORY_DIR.rglob("*"): + if fp.is_file() and fp.stat().st_size < 10: + empty.append(str(fp.relative_to(MEMORY_DIR))) + + return { + "orphaned_files": orphaned, + "broken_links": broken_links, + "empty_files": sorted(empty), + } + + +# --- Stats --- + +def gather_stats() -> dict: + """Gather statistics about the memory directory.""" + now = datetime.now() + files = list(MEMORY_DIR.rglob("*")) + file_list = [f for f in files if f.is_file()] + + total_size = sum(f.stat().st_size for f in file_list) + by_ext: dict[str, int] = defaultdict(int) + mtimes = [] + sizes = [] + word_counts = [] + changed_24h = changed_7d = changed_30d = 0 + + for f in file_list: + ext = f.suffix or "(none)" + by_ext[ext] += 1 + st = f.stat() + mt = datetime.fromtimestamp(st.st_mtime) + mtimes.append((str(f.relative_to(MEMORY_DIR)), mt)) + sizes.append((str(f.relative_to(MEMORY_DIR)), st.st_size)) + age = (now - mt).days + if age < 1: + changed_24h += 1 + if age < 7: + changed_7d += 1 + if age < 30: + changed_30d += 1 + + if f.suffix == ".md": + try: + words = len(f.read_text(errors="replace").split()) + word_counts.append((str(f.relative_to(MEMORY_DIR)), words)) + except Exception: + pass + + mtimes.sort(key=lambda x: x[1]) + sizes.sort(key=lambda x: x[1], reverse=True) + + return { + "total_files": len(file_list), + "total_size_bytes": total_size, + "total_size_human": f"{total_size / 1024:.1f} KB", + "files_by_extension": dict(sorted(by_ext.items())), + "oldest": {"file": mtimes[0][0], "date": mtimes[0][1].isoformat()} if mtimes else None, + "newest": {"file": mtimes[-1][0], "date": mtimes[-1][1].isoformat()} if mtimes else None, + "largest_files": [{"file": f, "bytes": s} for f, s in sizes[:10]], + "changed_24h": changed_24h, + "changed_7d": changed_7d, + "changed_30d": changed_30d, + "word_count_top10": sorted(word_counts, key=lambda x: x[1], reverse=True)[:10], + } + + +# --- Archive --- + +def archive_old_notes(older_than_days: int = 90, execute: bool = False, now: datetime | None = None) -> dict: + """Archive old daily notes.""" + if now is None: + now = datetime.now() + cutoff = now - timedelta(days=older_than_days) + to_move = [] + + for fp in MEMORY_DIR.glob("*.md"): + name = fp.name + if name in PERMANENT_FILES: + continue + if not DAILY_NOTE_RE.match(name): + continue + # Extract date from filename + m = DATE_RE.match(name) + if not m: + continue + try: + file_date = datetime(int(m.group(1)), int(m.group(2)), int(m.group(3))) + except ValueError: + continue + if file_date < cutoff: + year = m.group(1) + dest_dir = ARCHIVE_DIR / year + to_move.append({ + "source": str(fp.relative_to(MEMORY_DIR)), + "dest": str(dest_dir.relative_to(MEMORY_DIR) / name), + "date": file_date.isoformat(), + }) + + manifest = { + "archived_at": now.isoformat(), + "older_than_days": older_than_days, + "dry_run": not execute, + "files": to_move, + "count": len(to_move), + } + + if execute and to_move: + for item in to_move: + src = MEMORY_DIR / item["source"] + dst = MEMORY_DIR / item["dest"] + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.move(str(src), str(dst)) + # Write manifest + manifest_path = ARCHIVE_DIR / f"manifest-{now.strftime('%Y%m%d-%H%M%S')}.json" + manifest_path.parent.mkdir(parents=True, exist_ok=True) + manifest_path.write_text(json.dumps(manifest, indent=2)) + manifest["manifest_path"] = str(manifest_path.relative_to(MEMORY_DIR)) + + return manifest + + +# --- Report --- + +def generate_report() -> tuple[str, bool]: + """Generate combined markdown report. Returns (report_text, has_critical).""" + lines = ["# Memory Hygiene Report", f"Generated: {datetime.now().isoformat()}", ""] + has_critical = False + + # Stats + stats = gather_stats() + lines.append("## Stats") + lines.append(f"- **Files:** {stats['total_files']} ({stats['total_size_human']})") + lines.append(f"- **Changed 24h/7d/30d:** {stats['changed_24h']}/{stats['changed_7d']}/{stats['changed_30d']}") + lines.append("") + + # Duplicates + dupes = find_duplicates() + lines.append(f"## Duplicates ({len(dupes)} found)") + for d in dupes[:10]: + locs = ", ".join(f"`{l['file']}:{l['line']}`" for l in d["locations"]) + lines.append(f"- {locs}: {d['locations'][0]['preview'][:60]}...") + lines.append("") + + # Staleness + stale = find_stale() + warnings = [s for s in stale if s["severity"] == "warning"] + lines.append(f"## Stale Items ({len(stale)} total, {len(warnings)} warnings)") + if warnings: + has_critical = True + for s in stale[:20]: + icon = "⚠️" if s["severity"] == "warning" else "ℹ️" + lines.append(f"- {icon} `{s['file']}:{s['line']}` β€” {s['reason']}") + lines.append("") + + # Orphans + orph = find_orphans() + bl = orph["broken_links"] + lines.append(f"## Orphans") + lines.append(f"- **Orphaned files:** {len(orph['orphaned_files'])}") + lines.append(f"- **Broken links:** {len(bl)}") + lines.append(f"- **Empty files:** {len(orph['empty_files'])}") + if bl: + has_critical = True + for b in bl[:10]: + lines.append(f" - `{b['file']}` β†’ `{b['target']}` (broken)") + lines.append("") + + # Archive candidates + archive = archive_old_notes(older_than_days=90, execute=False) + lines.append(f"## Archive Candidates ({archive['count']} files older than 90 days)") + for f in archive["files"][:10]: + lines.append(f"- `{f['source']}` β†’ `{f['dest']}`") + lines.append("") + + return "\n".join(lines), has_critical + + +def main(): + parser = argparse.ArgumentParser(description="Memory Hygiene Tools") + sub = parser.add_subparsers(dest="command") + + sub.add_parser("dupes", help="Find duplicate content") + sub.add_parser("stale", help="Find stale content") + sub.add_parser("orphans", help="Find orphaned files and broken links") + sub.add_parser("stats", help="Memory statistics") + + arc = sub.add_parser("archive", help="Archive old daily notes") + arc.add_argument("--older-than", default="90d", help="Age threshold (e.g., 90d)") + arc.add_argument("--execute", action="store_true", help="Actually move files (default: dry-run)") + + sub.add_parser("report", help="Full hygiene report") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + if args.command == "dupes": + dupes = find_duplicates() + print(json.dumps(dupes, indent=2, ensure_ascii=False)) + print(f"\n{len(dupes)} duplicate groups found.", file=sys.stderr) + + elif args.command == "stale": + stale = find_stale() + print(json.dumps(stale, indent=2, ensure_ascii=False)) + print(f"\n{len(stale)} stale items found.", file=sys.stderr) + + elif args.command == "orphans": + orph = find_orphans() + print(json.dumps(orph, indent=2, ensure_ascii=False)) + + elif args.command == "stats": + stats = gather_stats() + print(json.dumps(stats, indent=2, ensure_ascii=False)) + + elif args.command == "archive": + days = int(args.older_than.rstrip("d")) + result = archive_old_notes(older_than_days=days, execute=args.execute) + print(json.dumps(result, indent=2, ensure_ascii=False)) + if not args.execute and result["count"] > 0: + print(f"\nDry run: {result['count']} files would be archived. Use --execute to proceed.", file=sys.stderr) + + elif args.command == "report": + report, has_critical = generate_report() + print(report) + if has_critical: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/cortex/roadmap.py b/cortex/roadmap.py new file mode 100644 index 0000000..4ddf63c --- /dev/null +++ b/cortex/roadmap.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +"""Roadmap Manager β€” track tasks, deadlines, dependencies, generate reports. + +Usage: + python3 roadmap.py list [--status open] [--priority P0,P1] [--tag security] + python3 roadmap.py add "title" --priority P1 --deadline 2026-02-19 --tag infra + python3 roadmap.py update --status done + python3 roadmap.py overdue + python3 roadmap.py upcoming [--days 7] + python3 roadmap.py report + python3 roadmap.py check-deps +""" + +import argparse +import json +import sys +import uuid +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any + +ROADMAP_FILE = Path.home() / "clawd" / "memory" / "roadmap.json" +VALID_STATUSES = {"open", "in_progress", "blocked", "done"} +VALID_PRIORITIES = {"P0", "P1", "P2", "P3"} + + +def load_roadmap(path: Path | None = None) -> dict: + p = path or ROADMAP_FILE + if p.exists(): + return json.loads(p.read_text()) + return {"items": []} + + +def save_roadmap(data: dict, path: Path | None = None): + p = path or ROADMAP_FILE + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(json.dumps(data, indent=2, ensure_ascii=False)) + + +def find_item(data: dict, item_id: str) -> dict | None: + for item in data["items"]: + if item["id"] == item_id or item["id"].startswith(item_id): + return item + return None + + +def now_iso() -> str: + return datetime.now().isoformat(timespec="seconds") + + +def parse_date(s: str | None) -> datetime | None: + if not s: + return None + for fmt in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%d"]: + try: + return datetime.strptime(s, fmt) + except ValueError: + continue + return None + + +# --- Commands --- + +def cmd_list(args, data: dict): + items = data["items"] + if args.status: + statuses = set(args.status.split(",")) + items = [i for i in items if i["status"] in statuses] + if args.priority: + priorities = set(args.priority.split(",")) + items = [i for i in items if i["priority"] in priorities] + if args.tag: + tags = set(args.tag.split(",")) + items = [i for i in items if tags & set(i.get("tags", []))] + + if not items: + print("No items found.") + return + + # Sort by priority then deadline + prio_order = {"P0": 0, "P1": 1, "P2": 2, "P3": 3} + items.sort(key=lambda x: (prio_order.get(x["priority"], 9), + x.get("deadline") or "9999")) + + for i in items: + deadline = i.get("deadline", "") + dl_str = f" ⏰ {deadline}" if deadline else "" + status_icon = {"open": "⬚", "in_progress": "πŸ”„", "blocked": "🚫", "done": "βœ…" + }.get(i["status"], "?") + tags = " ".join(f"#{t}" for t in i.get("tags", [])) + print(f" {status_icon} [{i['priority']}] {i['title']}{dl_str} {tags}") + print(f" id: {i['id'][:8]}") + + +def cmd_add(args, data: dict): + item = { + "id": str(uuid.uuid4()), + "title": args.title, + "status": "open", + "priority": args.priority or "P2", + "deadline": args.deadline, + "depends_on": [], + "tags": args.tag.split(",") if args.tag else [], + "created": now_iso(), + "updated": now_iso(), + "notes": args.notes or "", + } + if item["priority"] not in VALID_PRIORITIES: + print(f"Invalid priority: {item['priority']}") + sys.exit(1) + data["items"].append(item) + save_roadmap(data) + print(f"Added: {item['title']} (id: {item['id'][:8]})") + + +def cmd_update(args, data: dict): + item = find_item(data, args.id) + if not item: + print(f"Item not found: {args.id}") + sys.exit(1) + if args.status: + if args.status not in VALID_STATUSES: + print(f"Invalid status: {args.status}") + sys.exit(1) + item["status"] = args.status + if args.priority: + item["priority"] = args.priority + if args.deadline: + item["deadline"] = args.deadline + if args.notes: + item["notes"] = args.notes + if args.tag: + item["tags"] = args.tag.split(",") + item["updated"] = now_iso() + save_roadmap(data) + print(f"Updated: {item['title']}") + + +def cmd_overdue(args, data: dict): + now = datetime.now() + overdue = [] + for item in data["items"]: + if item["status"] == "done": + continue + dl = parse_date(item.get("deadline")) + if dl and dl < now: + overdue.append(item) + + if not overdue: + print("No overdue items. βœ…") + return + + prio_order = {"P0": 0, "P1": 1, "P2": 2, "P3": 3} + overdue.sort(key=lambda x: prio_order.get(x["priority"], 9)) + + has_critical = any(i["priority"] in ("P0", "P1") for i in overdue) + print(f"🚨 {len(overdue)} overdue item(s):") + for i in overdue: + print(f" [{i['priority']}] {i['title']} β€” due {i['deadline']}") + + if has_critical: + sys.exit(1) + + +def cmd_upcoming(args, data: dict): + now = datetime.now() + days = args.days or 7 + cutoff = now + timedelta(days=days) + upcoming = [] + for item in data["items"]: + if item["status"] == "done": + continue + dl = parse_date(item.get("deadline")) + if dl and now <= dl <= cutoff: + upcoming.append(item) + + if not upcoming: + print(f"No items due in the next {days} days.") + return + + upcoming.sort(key=lambda x: x.get("deadline", "")) + print(f"πŸ“… {len(upcoming)} item(s) due in the next {days} days:") + for i in upcoming: + print(f" [{i['priority']}] {i['title']} β€” due {i['deadline']}") + + +def cmd_report(args, data: dict): + now = datetime.now() + lines = [ + f"# Roadmap Status Report", + f"*Generated: {now.strftime('%Y-%m-%d %H:%M')}*", + "", + ] + + # Summary + by_status = {} + for item in data["items"]: + by_status.setdefault(item["status"], []).append(item) + + lines.append("## Summary") + for status in ["open", "in_progress", "blocked", "done"]: + count = len(by_status.get(status, [])) + icon = {"open": "⬚", "in_progress": "πŸ”„", "blocked": "🚫", "done": "βœ…"}[status] + lines.append(f"- {icon} {status}: {count}") + lines.append("") + + # Overdue + overdue = [i for i in data["items"] if i["status"] != "done" + and parse_date(i.get("deadline")) and parse_date(i["deadline"]) < now] + if overdue: + lines.append("## 🚨 Overdue") + for i in overdue: + lines.append(f"- **[{i['priority']}]** {i['title']} β€” due {i['deadline']}") + lines.append("") + + # Upcoming (7 days) + cutoff = now + timedelta(days=7) + upcoming = [i for i in data["items"] if i["status"] != "done" + and parse_date(i.get("deadline")) + and now <= parse_date(i["deadline"]) <= cutoff] + if upcoming: + lines.append("## πŸ“… Upcoming (7 days)") + for i in sorted(upcoming, key=lambda x: x["deadline"]): + lines.append(f"- **[{i['priority']}]** {i['title']} β€” due {i['deadline']}") + lines.append("") + + # Stale items (in_progress > 7 days without update) + stale = [] + for item in data["items"]: + if item["status"] == "in_progress": + updated = parse_date(item.get("updated")) + if updated and (now - updated).days > 7: + stale.append(item) + if stale: + lines.append("## ⏳ Stale (in_progress >7 days)") + for i in stale: + lines.append(f"- **[{i['priority']}]** {i['title']} β€” last updated {i['updated']}") + lines.append("") + + # Active work + active = by_status.get("in_progress", []) + if active: + lines.append("## πŸ”„ In Progress") + for i in active: + dl = f" (due {i['deadline']})" if i.get("deadline") else "" + lines.append(f"- **[{i['priority']}]** {i['title']}{dl}") + lines.append("") + + print("\n".join(lines)) + + +def cmd_check_deps(args, data: dict): + id_status = {i["id"]: i for i in data["items"]} + issues = [] + for item in data["items"]: + if item["status"] == "done": + continue + for dep_id in item.get("depends_on", []): + dep = id_status.get(dep_id) + if not dep: + issues.append((item, dep_id, "missing")) + elif dep["status"] != "done": + issues.append((item, dep_id, dep["status"])) + + if not issues: + print("No dependency issues. βœ…") + return + + print(f"⚠️ {len(issues)} dependency issue(s):") + for item, dep_id, status in issues: + if status == "missing": + print(f" [{item['priority']}] {item['title']} β†’ depends on unknown {dep_id[:8]}") + else: + dep = id_status[dep_id] + print(f" [{item['priority']}] {item['title']} β†’ blocked by '{dep['title']}' ({status})") + + +def main(): + parser = argparse.ArgumentParser(description="Roadmap Manager") + sub = parser.add_subparsers(dest="command") + + # list + p_list = sub.add_parser("list") + p_list.add_argument("--status", type=str) + p_list.add_argument("--priority", type=str) + p_list.add_argument("--tag", type=str) + + # add + p_add = sub.add_parser("add") + p_add.add_argument("title", type=str) + p_add.add_argument("--priority", type=str, default="P2") + p_add.add_argument("--deadline", type=str) + p_add.add_argument("--tag", type=str) + p_add.add_argument("--notes", type=str) + + # update + p_upd = sub.add_parser("update") + p_upd.add_argument("id", type=str) + p_upd.add_argument("--status", type=str) + p_upd.add_argument("--priority", type=str) + p_upd.add_argument("--deadline", type=str) + p_upd.add_argument("--tag", type=str) + p_upd.add_argument("--notes", type=str) + + # overdue + sub.add_parser("overdue") + + # upcoming + p_up = sub.add_parser("upcoming") + p_up.add_argument("--days", type=int, default=7) + + # report + sub.add_parser("report") + + # check-deps + sub.add_parser("check-deps") + + args = parser.parse_args() + if not args.command: + parser.print_help() + sys.exit(1) + + data = load_roadmap() + + cmd_map = { + "list": cmd_list, + "add": cmd_add, + "update": cmd_update, + "overdue": cmd_overdue, + "upcoming": cmd_upcoming, + "report": cmd_report, + "check-deps": cmd_check_deps, + } + cmd_map[args.command](args, data) + + +if __name__ == "__main__": + main() diff --git a/cortex/triage.py b/cortex/triage.py new file mode 100644 index 0000000..5fff978 --- /dev/null +++ b/cortex/triage.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +"""Priority Triage System β€” score and rank tasks by urgency, importance, effort, readiness.""" + +import json +import re +import sys +from dataclasses import dataclass, asdict + +# --- Keyword banks (DE + EN) --- + +URGENCY_KEYWORDS = { + # word -> weight (0.0-1.0 contribution) + "jetzt": 0.9, "sofort": 0.9, "asap": 0.8, "urgent": 0.8, "dringend": 0.8, + "notfall": 1.0, "emergency": 1.0, "down": 0.9, "broken": 0.7, "kaputt": 0.7, + "fehler": 0.6, "error": 0.6, "critical": 0.9, "alarm": 0.8, "outage": 0.9, + "crash": 0.8, "timeout": 0.5, "offline": 0.7, "immediately": 0.8, + "deadline": 0.7, "frist": 0.7, "heute": 0.5, "today": 0.5, + "blocker": 0.8, "p0": 1.0, "p1": 0.8, "sev1": 0.9, "sev2": 0.7, + "morgen": 0.6, "tomorrow": 0.6, "nΓ€chste woche": 0.5, "next week": 0.5, + "fΓ€llig": 0.7, "overdue": 0.8, "ΓΌberfΓ€llig": 0.8, + "beschwer": 0.6, "complaint": 0.6, "problem": 0.4, +} + +IMPORTANCE_KEYWORDS = { + "production": 0.9, "prod": 0.9, "security": 0.9, "sicherheit": 0.9, + "geld": 0.8, "money": 0.8, "finance": 0.8, "finanzen": 0.8, + "kunde": 0.7, "customer": 0.7, "kunden": 0.7, "bafin": 1.0, + "regulatory": 0.9, "compliance": 0.9, "gateway": 0.7, "dns": 0.7, + "albert": 0.6, "chef": 0.6, "boss": 0.6, + "database": 0.6, "datenbank": 0.6, "backup": 0.7, + "ssl": 0.7, "certificate": 0.7, "zertifikat": 0.7, + "auth": 0.7, "login": 0.6, "password": 0.7, "passwort": 0.7, + "data loss": 1.0, "datenverlust": 1.0, + # Security keywords + "breach": 0.95, "compromised": 0.9, "hacked": 0.95, "vulnerability": 0.8, + "exploit": 0.9, "unauthorized": 0.85, "ssh keys": 0.85, + # Compliance keywords + "audit": 0.8, "iso": 0.7, "compliance": 0.9, "regulatory": 0.9, + "certification": 0.7, "zertifizierung": 0.7, + # Business keywords + "revenue": 0.8, "umsatz": 0.8, "zahlung": 0.7, "payment": 0.7, + "beschwer": 0.7, +} + +LOW_EFFORT_KEYWORDS = { + "quick": 0.8, "schnell": 0.8, "einfach": 0.8, "simple": 0.8, + "typo": 0.9, "tippfehler": 0.9, "config": 0.6, "toggle": 0.9, + "restart": 0.7, "neustart": 0.7, "one-liner": 0.9, "fix": 0.5, +} + +HIGH_EFFORT_KEYWORDS = { + "komplett": 0.8, "refactor": 0.9, "migration": 0.9, "redesign": 0.9, + "rewrite": 0.9, "umschreiben": 0.9, "architecture": 0.8, "architektur": 0.8, + "von grund auf": 0.9, "from scratch": 0.9, "multi-step": 0.6, + "complex": 0.7, "komplex": 0.7, "projekt": 0.5, "project": 0.5, +} + +NOT_READY_KEYWORDS = { + "blocked": 0.9, "blockiert": 0.9, "warte auf": 0.8, "waiting for": 0.8, + "depends on": 0.8, "abhΓ€ngig von": 0.8, "brauche erst": 0.8, "need first": 0.8, + "pending": 0.6, "ausstehend": 0.6, "not yet": 0.5, "noch nicht": 0.5, +} + + +def _scan_keywords(text: str, keywords: dict[str, float]) -> float: + """Scan text for keywords, return max-based score with diminishing boost.""" + t = text.lower() + hits = [] + for kw, weight in keywords.items(): + if kw in t: + hits.append(weight) + if not hits: + return 0.0 + hits.sort(reverse=True) + score = hits[0] + for h in hits[1:]: + score = min(1.0, score + h * 0.15) # diminishing returns + return round(min(1.0, score), 3) + + +@dataclass +class TriageScore: + text: str + urgency: float + importance: float + effort: float # 0=trivial, 1=massive + readiness: float # 1=ready, 0=blocked + priority: float + + def to_dict(self): + d = asdict(self) + d["priority"] = round(d["priority"], 3) + return d + + +def score_task(text: str) -> TriageScore: + """Score a single task/message.""" + urgency = _scan_keywords(text, URGENCY_KEYWORDS) + importance = _scan_keywords(text, IMPORTANCE_KEYWORDS) + + low_effort = _scan_keywords(text, LOW_EFFORT_KEYWORDS) + high_effort = _scan_keywords(text, HIGH_EFFORT_KEYWORDS) + # effort: 0=trivial, 1=massive. Default 0.5 (medium) + if low_effort > high_effort: + effort = max(0.1, 0.5 - low_effort * 0.4) + elif high_effort > low_effort: + effort = min(1.0, 0.5 + high_effort * 0.4) + else: + effort = 0.5 + + not_ready = _scan_keywords(text, NOT_READY_KEYWORDS) + readiness = round(1.0 - not_ready, 3) + + # priority_score = (urgency * 0.4) + (importance * 0.35) + (1/effort_norm * 0.15) + (ready * 0.1) + # 1/effort: invert so low effort = high score. Scale effort 0.1-1.0 -> 1.0-10.0 inverted to 0.1-1.0 + effort_inv = round(1.0 - effort, 3) # simple inversion: low effort -> high score + priority = (urgency * 0.4) + (importance * 0.35) + (effort_inv * 0.15) + (readiness * 0.1) + + return TriageScore( + text=text, + urgency=round(urgency, 3), + importance=round(importance, 3), + effort=round(effort, 3), + readiness=readiness, + priority=round(priority, 3), + ) + + +def rank_tasks(texts: list[str]) -> list[TriageScore]: + """Score and rank multiple tasks by priority (highest first).""" + scores = [score_task(t) for t in texts] + scores.sort(key=lambda s: s.priority, reverse=True) + return scores + + +def analyze_message(text: str) -> dict: + """Deep analysis of a message β€” return signals found.""" + score = score_task(text) + t = text.lower() + + signals = {"urgency": [], "importance": [], "effort": [], "readiness": []} + for kw in URGENCY_KEYWORDS: + if kw in t: + signals["urgency"].append(kw) + for kw in IMPORTANCE_KEYWORDS: + if kw in t: + signals["importance"].append(kw) + for kw in LOW_EFFORT_KEYWORDS: + if kw in t: + signals["effort"].append(f"{kw} (low)") + for kw in HIGH_EFFORT_KEYWORDS: + if kw in t: + signals["effort"].append(f"{kw} (high)") + for kw in NOT_READY_KEYWORDS: + if kw in t: + signals["readiness"].append(f"{kw} (blocker)") + + # Classify + p = score.priority + if p >= 0.8: + classification = "πŸ”΄ CRITICAL β€” handle immediately" + elif p >= 0.6: + classification = "🟠 HIGH β€” handle soon" + elif p >= 0.4: + classification = "🟑 MEDIUM β€” schedule" + elif p >= 0.2: + classification = "πŸ”΅ LOW β€” backlog" + else: + classification = "βšͺ MINIMAL β€” ignore" + + return { + "score": score.to_dict(), + "signals": signals, + "classification": classification, + } + + +def main(): + if len(sys.argv) < 2: + print("Usage: python3 triage.py ") + sys.exit(1) + + cmd = sys.argv[1] + + if cmd == "score": + if len(sys.argv) < 3: + print("Usage: python3 triage.py score \"task description\"") + sys.exit(1) + result = score_task(sys.argv[2]) + print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False)) + + elif cmd == "rank": + if len(sys.argv) < 3: + print("Usage: python3 triage.py rank \"task1\" \"task2\" ...") + sys.exit(1) + tasks = sys.argv[2:] + ranked = rank_tasks(tasks) + output = [{"rank": i + 1, **s.to_dict()} for i, s in enumerate(ranked)] + print(json.dumps(output, indent=2, ensure_ascii=False)) + + elif cmd == "analyze": + if len(sys.argv) < 3: + print("Usage: python3 triage.py analyze \"message\"") + sys.exit(1) + result = analyze_message(sys.argv[2]) + print(json.dumps(result, indent=2, ensure_ascii=False)) + + else: + print(f"Unknown command: {cmd}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/cortex/validate_output.py b/cortex/validate_output.py new file mode 100644 index 0000000..6126620 --- /dev/null +++ b/cortex/validate_output.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python3 +"""Sub-Agent Output Validator β€” post-completion validation of sub-agent work. + +Validates that spawned tasks actually produced what was requested: +files created, non-empty, syntax-valid, tests passed, errors addressed. + +Usage: + python3 validate_output.py --transcript /path/to/session.jsonl --task "description" + python3 validate_output.py --session-key "agent:main:subagent:xxx" +""" + +import argparse +import ast +import json +import os +import re +import subprocess +import sys +from pathlib import Path +from typing import Any + +DEFAULT_SESSIONS_DIR = Path(os.path.expanduser("~/.openclaw/agents/main/sessions")) + + +def parse_jsonl(path: Path) -> list[dict]: + entries = [] + with open(path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + pass + return entries + + +def find_session_by_key(session_key: str) -> Path | None: + """Find a session JSONL by its session key (searches session headers).""" + sessions_dir = DEFAULT_SESSIONS_DIR + if not sessions_dir.exists(): + return None + # session key format: agent:main:subagent:UUID β€” extract UUID + parts = session_key.split(":") + uuid_part = parts[-1] if parts else session_key + candidate = sessions_dir / f"{uuid_part}.jsonl" + if candidate.exists(): + return candidate + # Search all files + for f in sessions_dir.glob("*.jsonl"): + try: + with open(f) as fh: + first_line = fh.readline().strip() + if first_line: + header = json.loads(first_line) + if header.get("id") == uuid_part or session_key in json.dumps(header): + return f + except (json.JSONDecodeError, OSError): + pass + return None + + +def extract_mentioned_files(task: str) -> list[str]: + """Extract file paths/names mentioned in a task description.""" + patterns = [ + r'`([^`]+\.\w{1,5})`', # `filename.ext` + r'(\S+\.(?:py|ts|js|json|md|yaml|yml|toml|sh))', # bare filenames + ] + files = set() + for pat in patterns: + for m in re.finditer(pat, task): + f = m.group(1) + if "/" not in f and not f.startswith("."): + files.add(f) + elif f.startswith("~/") or f.startswith("./") or f.startswith("/"): + files.add(f) + return list(files) + + +def extract_created_files_from_transcript(entries: list[dict]) -> list[str]: + """Extract files that were written/created during the session.""" + files = set() + for entry in entries: + msg = entry.get("message", {}) + content = msg.get("content", []) + if isinstance(content, list): + for item in content: + if not isinstance(item, dict): + continue + # Tool calls to write/edit + if item.get("type") == "toolCall": + name = item.get("name", "") + args = item.get("arguments", {}) + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {} + if name in ("write", "Write"): + fp = args.get("file_path") or args.get("path", "") + if fp: + files.add(fp) + elif name in ("edit", "Edit"): + fp = args.get("file_path") or args.get("path", "") + if fp: + files.add(fp) + # Also check exec commands for file creation + if isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "toolCall": + if item.get("name") in ("exec", "Exec"): + args = item.get("arguments", {}) + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {} + cmd = args.get("command", "") + # Detect mkdir, touch, tee, etc. + for m in re.finditer(r'(?:tee|>)\s+(\S+)', cmd): + files.add(m.group(1)) + return list(files) + + +def extract_transcript_errors(entries: list[dict]) -> list[str]: + """Extract unaddressed error messages from transcript.""" + errors = [] + for entry in entries: + msg = entry.get("message", {}) + if msg.get("isError"): + content = msg.get("content", []) + if isinstance(content, list): + for item in content: + if isinstance(item, dict): + errors.append(item.get("text", "unknown error")[:200]) + elif isinstance(content, str): + errors.append(content[:200]) + return errors + + +def check_test_results(entries: list[dict]) -> dict: + """Scan transcript for test execution results.""" + result = {"tests_found": False, "tests_passed": None, "details": ""} + for entry in entries: + msg = entry.get("message", {}) + content = msg.get("content", []) + if isinstance(content, list): + for item in content: + text = "" + if isinstance(item, dict): + text = item.get("text", "") + if not text: + continue + # pytest output + if re.search(r'\d+ passed', text): + result["tests_found"] = True + if "failed" in text.lower(): + result["tests_passed"] = False + result["details"] = text[:200] + else: + result["tests_passed"] = True + result["details"] = text[:200] + # unittest output + if re.search(r'OK\s*$', text.strip()): + result["tests_found"] = True + result["tests_passed"] = True + if re.search(r'FAILED', text): + result["tests_found"] = True + result["tests_passed"] = False + result["details"] = text[:200] + return result + + +class ValidationReport: + def __init__(self): + self.checks: list[dict] = [] + self.passed = 0 + self.failed = 0 + self.warnings = 0 + + def add(self, name: str, status: str, detail: str = ""): + """status: pass, fail, warn""" + self.checks.append({"name": name, "status": status, "detail": detail}) + if status == "pass": + self.passed += 1 + elif status == "fail": + self.failed += 1 + else: + self.warnings += 1 + + @property + def ok(self) -> bool: + return self.failed == 0 + + def to_json(self) -> dict: + return { + "passed": self.passed, + "failed": self.failed, + "warnings": self.warnings, + "ok": self.ok, + "checks": self.checks, + } + + def to_human(self) -> str: + lines = [f"Validation Report: {'βœ… PASS' if self.ok else '❌ FAIL'}", + f" {self.passed} passed, {self.failed} failed, {self.warnings} warnings", ""] + for c in self.checks: + icon = {"pass": "βœ…", "fail": "❌", "warn": "⚠️"}.get(c["status"], "?") + line = f" {icon} {c['name']}" + if c["detail"]: + line += f" β€” {c['detail']}" + lines.append(line) + return "\n".join(lines) + + +def validate_file_exists(path: str) -> tuple[bool, str]: + """Check if file exists and is non-empty.""" + p = Path(os.path.expanduser(path)) + if not p.exists(): + return False, f"File not found: {path}" + if p.stat().st_size == 0: + return False, f"File is empty: {path}" + return True, f"Exists, {p.stat().st_size} bytes" + + +def validate_python_syntax(path: str) -> tuple[bool, str]: + """Check Python file for syntax errors.""" + p = Path(os.path.expanduser(path)) + if not p.exists(): + return False, "File not found" + try: + source = p.read_text() + compile(source, str(p), "exec") + return True, "Syntax OK" + except SyntaxError as e: + return False, f"Syntax error: {e}" + + +def validate_typescript_structure(path: str) -> tuple[bool, str]: + """Basic TypeScript validation β€” check for exports, no excessive 'any'.""" + p = Path(os.path.expanduser(path)) + if not p.exists(): + return False, "File not found" + content = p.read_text() + issues = [] + any_count = len(re.findall(r'\bany\b', content)) + if any_count > 10: + issues.append(f"Excessive 'any' usage ({any_count} occurrences)") + if not re.search(r'\bexport\b', content): + issues.append("No exports found") + if issues: + return False, "; ".join(issues) + return True, "Structure OK" + + +def validate_staging_location(files: list[str], staging_dir: str = "staging") -> list[tuple[str, bool, str]]: + """Check if files are in the staging directory.""" + results = [] + for f in files: + expanded = os.path.expanduser(f) + in_staging = staging_dir in expanded + results.append((f, in_staging, "In staging" if in_staging else "Not in staging dir")) + return results + + +def run_validation(transcript_path: Path, task: str) -> ValidationReport: + """Main validation logic.""" + report = ValidationReport() + entries = parse_jsonl(transcript_path) + + if not entries: + report.add("Transcript readable", "fail", "No entries found") + return report + report.add("Transcript readable", "pass", f"{len(entries)} entries") + + # Check mentioned files exist + mentioned = extract_mentioned_files(task) + created = extract_created_files_from_transcript(entries) + all_files = list(set(mentioned + created)) + + for f in all_files: + exists, detail = validate_file_exists(f) + report.add(f"File exists: {Path(f).name}", "pass" if exists else "fail", detail) + + # Syntax check Python files + for f in all_files: + if f.endswith(".py"): + ok, detail = validate_python_syntax(f) + report.add(f"Python syntax: {Path(f).name}", "pass" if ok else "fail", detail) + + # Syntax check TypeScript files + for f in all_files: + if f.endswith(".ts") or f.endswith(".tsx"): + ok, detail = validate_typescript_structure(f) + report.add(f"TS structure: {Path(f).name}", "pass" if ok else "fail", detail) + + # Check staging location if mentioned in task + if "staging" in task.lower(): + for f in all_files: + expanded = os.path.expanduser(f) + in_staging = "staging" in expanded + report.add(f"In staging: {Path(f).name}", + "pass" if in_staging else "warn", + "In staging dir" if in_staging else "Not in staging dir") + + # Check test results if tests were requested + if any(w in task.lower() for w in ["test", "tests", "pytest", "unittest"]): + test_results = check_test_results(entries) + if test_results["tests_found"]: + if test_results["tests_passed"]: + report.add("Tests passed", "pass", test_results["details"][:100]) + else: + report.add("Tests passed", "fail", test_results["details"][:100]) + else: + report.add("Tests executed", "warn", "No test output found in transcript") + + # Check for unaddressed errors + errors = extract_transcript_errors(entries) + if errors: + report.add("Unaddressed errors", "warn", f"{len(errors)} error(s) in transcript") + else: + report.add("No unaddressed errors", "pass") + + return report + + +def main(): + parser = argparse.ArgumentParser(description="Sub-Agent Output Validator") + parser.add_argument("--transcript", help="Path to session JSONL file") + parser.add_argument("--session-key", help="Session key (e.g. agent:main:subagent:xxx)") + parser.add_argument("--task", default="", help="Original task description") + parser.add_argument("--json", action="store_true", help="Output JSON only") + args = parser.parse_args() + + if not args.transcript and not args.session_key: + parser.error("Either --transcript or --session-key is required") + + if args.session_key: + path = find_session_by_key(args.session_key) + if not path: + print(f"Session not found: {args.session_key}", file=sys.stderr) + sys.exit(1) + else: + path = Path(args.transcript) + if not path.exists(): + print(f"File not found: {args.transcript}", file=sys.stderr) + sys.exit(1) + + report = run_validation(path, args.task) + + if args.json: + print(json.dumps(report.to_json(), indent=2)) + else: + print(report.to_human()) + print() + print(json.dumps(report.to_json(), indent=2)) + + sys.exit(0 if report.ok else 1) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e534ef0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools>=68.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "cortex" +version = "0.1.0" +description = "Intelligence layer for OpenClaw β€” triage, health, feedback, memory hygiene, roadmap, validation" +readme = "README.md" +requires-python = ">=3.11" +license = {text = "MIT"} +authors = [ + {name = "Claudia & Albert", email = "hildalbert@gmail.com"}, +] + +[project.scripts] +cortex = "cortex.cli:main" + +[tool.setuptools.packages.find] +include = ["cortex*"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/tests/test_enhancements.py b/tests/test_enhancements.py new file mode 100755 index 0000000..7219aaa --- /dev/null +++ b/tests/test_enhancements.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +""" +Tests for memory enhancement components. +Run: python3 -m pytest test_enhancements.py -v + or: python3 test_enhancements.py +""" + +import json +import sys +import time +import unittest +from datetime import date, timedelta +from pathlib import Path + +# Ensure our modules are importable +sys.path.insert(0, str(Path(__file__).parent)) + +from cortex.composite_scorer import ( + SearchResult, extract_date, recency_score, source_weight, + multi_term_confidence, score_results, +) +from cortex.intent_classifier import classify, reset_cache, IntentResult +from cortex.auto_handoff import extract_markers, format_handoff, generate_handoff + + +class TestCompositeScorer(unittest.TestCase): + """Tests for composite_scorer.py""" + + def test_extract_date_from_path(self): + r = SearchResult(text="test", source_path="memory/2026-02-07.md") + self.assertEqual(extract_date(r), date(2026, 2, 7)) + + def test_extract_date_from_content(self): + r = SearchResult(text="On 2026-01-15 we decided to...", source_path="MEMORY.md") + self.assertEqual(extract_date(r), date(2026, 1, 15)) + + def test_extract_date_none(self): + r = SearchResult(text="no date here", source_path="README.md") + self.assertIsNone(extract_date(r)) + + def test_recency_today(self): + today = date.today() + score = recency_score(today, today) + self.assertAlmostEqual(score, 1.0, places=2) + + def test_recency_half_life(self): + today = date.today() + half_life = 14 + old = today - timedelta(days=half_life) + score = recency_score(old, today, half_life) + self.assertAlmostEqual(score, 0.5, places=2) + + def test_recency_very_old(self): + today = date.today() + old = today - timedelta(days=365) + score = recency_score(old, today, 14) + self.assertLess(score, 0.01) + + def test_recency_no_date(self): + score = recency_score(None) + self.assertEqual(score, 0.3) + + def test_source_weight_memory_md(self): + w = source_weight("MEMORY.md", {"MEMORY.md": 1.0, "default": 0.4}) + self.assertEqual(w, 1.0) + + def test_source_weight_daily_note(self): + weights = {"memory/": 0.7, "default": 0.4} + w = source_weight("memory/2026-02-07.md", weights) + self.assertEqual(w, 0.7) + + def test_source_weight_default(self): + w = source_weight("random/file.txt", {"MEMORY.md": 1.0, "default": 0.4}) + self.assertEqual(w, 0.4) + + def test_multi_term_all_match(self): + score = multi_term_confidence("gateway fix watchdog", "The gateway fix for watchdog issue") + self.assertAlmostEqual(score, 1.0) + + def test_multi_term_partial(self): + score = multi_term_confidence("gateway fix watchdog", "The gateway is running fine") + self.assertAlmostEqual(score, 1/3, places=2) + + def test_multi_term_none(self): + score = multi_term_confidence("gateway fix", "completely unrelated text") + self.assertEqual(score, 0.0) + + def test_multi_term_empty_query(self): + score = multi_term_confidence("", "some text") + self.assertEqual(score, 0.5) + + def test_score_results_ordering(self): + """Recent high-source results should rank above old high-search-score results.""" + today = date.today() + recent_date = today.strftime("%Y-%m-%d") + old_date = (today - timedelta(days=90)).strftime("%Y-%m-%d") + + results = [ + SearchResult(text="old but high match", source_path=f"memory/{old_date}.md", original_score=0.95), + SearchResult(text="recent in MEMORY", source_path="MEMORY.md", original_score=0.7, + metadata={"date": recent_date}), + ] + scored = score_results(results, query="test query", reference_date=today) + # MEMORY.md with recent date should rank higher due to source + recency + self.assertEqual(scored[0].source_path, "MEMORY.md") + + def test_score_results_empty(self): + self.assertEqual(score_results([], query="test"), []) + + def test_scoring_performance(self): + """Composite scoring should be <10ms for 50 results.""" + results = [ + SearchResult(text=f"Result {i} about gateway and NATS", + source_path=f"memory/2026-01-{i%28+1:02d}.md", + original_score=0.5 + (i % 10) / 20) + for i in range(50) + ] + start = time.perf_counter() + score_results(results, query="gateway NATS") + elapsed_ms = (time.perf_counter() - start) * 1000 + self.assertLess(elapsed_ms, 10, f"Scoring took {elapsed_ms:.1f}ms, should be <10ms") + + +class TestIntentClassifier(unittest.TestCase): + """Tests for intent_classifier.py""" + + def setUp(self): + reset_cache() + + def test_who_query_english(self): + r = classify("Albert Hild contact") + self.assertEqual(r.intent, "WHO") + + def test_who_query_german(self): + r = classify("wer ist Sebastian Baier") + self.assertEqual(r.intent, "WHO") + + def test_when_query(self): + r = classify("when did we fix the gateway") + self.assertEqual(r.intent, "WHEN") + + def test_when_query_german(self): + r = classify("wann wurde TypeDB eingerichtet") + self.assertEqual(r.intent, "WHEN") + + def test_why_query(self): + r = classify("why did we choose NATS over Kafka") + self.assertEqual(r.intent, "WHY") + + def test_why_query_german(self): + r = classify("warum ChromaDB statt Pinecone") + self.assertEqual(r.intent, "WHY") + + def test_what_query(self): + r = classify("Mondo Gate regulatory status") + self.assertEqual(r.intent, "WHAT") + + def test_empty_query(self): + r = classify("") + self.assertEqual(r.intent, "WHAT") + self.assertLess(r.confidence, 0.5) + + def test_mixed_language(self): + r = classify("who is the Ansprechpartner for Mondo Gate") + self.assertEqual(r.intent, "WHO") + + def test_classification_speed(self): + """Intent classification must be <5ms.""" + queries = [ + "Albert Hild contact", "when did we fix the gateway", + "why NATS over Kafka", "infrastructure status", + "wer ist bei Vainplex", "wann Viola fix", + ] + for q in queries: + r = classify(q) + self.assertLess(r.classification_ms, 5.0, + f"Classification of '{q}' took {r.classification_ms:.2f}ms") + + def test_capitalized_names_boost_who(self): + r = classify("Sebastian Baier Mondo Gate") + self.assertEqual(r.intent, "WHO") + + def test_date_tokens_boost_when(self): + r = classify("watchdog incident February") + self.assertEqual(r.intent, "WHEN") + + def test_returns_weight_adjustments(self): + r = classify("warum ChromaDB statt Pinecone") + self.assertEqual(r.intent, "WHY") + # WHY should have weight adjustments for MEMORY.md boost + self.assertIsInstance(r.weight_adjustments, dict) + + +class TestAutoHandoff(unittest.TestCase): + """Tests for auto_handoff.py""" + + SAMPLE_CONTENT = """# Session 2026-02-07 + +## Infrastructure Work +- Fixed gateway watchdog issue +- DECISION: Use NATS instead of Kafka for event streaming +- TODO: Set up monitoring for new NATS cluster +- BLOCKED: Waiting for DNS propagation for new domain + +## Open Items +- QUESTION: Should we migrate old events to new format? +- [ ] Update documentation +- [ ] Run integration tests + +DECISION: Switch Mona to Opus model for better reasoning +TODO: Benchmark Opus vs Sonnet for our workload +""" + + def test_extract_decisions(self): + result = extract_markers(self.SAMPLE_CONTENT) + self.assertGreaterEqual(len(result["decisions"]), 2) + self.assertTrue(any("NATS" in d for d in result["decisions"])) + + def test_extract_todos(self): + result = extract_markers(self.SAMPLE_CONTENT) + self.assertGreaterEqual(len(result["todos"]), 3) # 2 explicit + 2 checkboxes + + def test_extract_blocked(self): + result = extract_markers(self.SAMPLE_CONTENT) + self.assertGreaterEqual(len(result["blocked"]), 1) + self.assertTrue(any("DNS" in b for b in result["blocked"])) + + def test_extract_questions(self): + result = extract_markers(self.SAMPLE_CONTENT) + self.assertGreaterEqual(len(result["questions"]), 1) + + def test_extract_headings(self): + result = extract_markers(self.SAMPLE_CONTENT) + self.assertGreaterEqual(len(result.get("key_context", [])), 1) + + def test_format_handoff(self): + extracted = { + "decisions": ["Use NATS"], + "todos": ["Set up monitoring"], + "blocked": ["DNS propagation"], + "questions": ["Migrate events?"], + } + md = format_handoff(extracted, title="Test Handoff") + self.assertIn("# Test Handoff", md) + self.assertIn("βœ… Decisions", md) + self.assertIn("Use NATS", md) + self.assertIn("πŸ“‹ Next Steps", md) + + def test_format_empty(self): + md = format_handoff({}) + self.assertIn("Session Handoff", md) + + def test_generate_handoff_regex(self): + result = generate_handoff(self.SAMPLE_CONTENT, source="test.md") + self.assertIn("NATS", result) + self.assertIn("DNS", result) + + def test_real_daily_note(self): + """Test with a real daily note if available.""" + note_path = Path.home() / "clawd" / "memory" / "2026-02-08.md" + if note_path.exists(): + content = note_path.read_text() + result = extract_markers(content) + # Should at least extract headings + self.assertIsInstance(result, dict) + self.assertIn("key_context", result) + + +class TestIntegration(unittest.TestCase): + """Integration tests for the full pipeline.""" + + def test_file_search_finds_results(self): + """File search should find results in memory/.""" + from cortex.enhanced_search import search_files + results = search_files("gateway") + # Should find at least something in memory files + self.assertIsInstance(results, list) + + def test_enhanced_search_pipeline(self): + """Full pipeline should run without errors.""" + from cortex.enhanced_search import enhanced_search + result = enhanced_search("gateway", use_unified=False, max_results=5) + self.assertIn("query", result) + self.assertIn("intent", result) + self.assertIn("results", result) + self.assertIn("timing", result) + self.assertEqual(result["intent"]["type"], "WHAT") + + def test_pipeline_who_query(self): + from cortex.enhanced_search import enhanced_search + result = enhanced_search("Albert Hild contact", use_unified=False, max_results=5) + self.assertEqual(result["intent"]["type"], "WHO") + + def test_pipeline_timing(self): + """Full pipeline without unified should be fast.""" + from cortex.enhanced_search import enhanced_search + result = enhanced_search("test query", use_unified=False, max_results=5) + # Should complete in reasonable time (< 2 seconds for file search) + self.assertLess(result["timing"]["total_ms"], 2000) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_intelligence.py b/tests/test_intelligence.py new file mode 100644 index 0000000..c2645b5 --- /dev/null +++ b/tests/test_intelligence.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python3 +"""Tests for triage.py and memory_hygiene.py β€” 35+ tests.""" + +import json +import os +import shutil +import sys +import tempfile +import unittest +from datetime import datetime, timedelta +from pathlib import Path +from unittest.mock import patch + +sys.path.insert(0, os.path.dirname(__file__)) +from cortex.triage import score_task, rank_tasks, analyze_message, TriageScore +import cortex.memory_hygiene as mh + + +class TestTriageUrgency(unittest.TestCase): + """Test urgency scoring dimension.""" + + def test_high_urgency_german(self): + s = score_task("Das muss JETZT sofort gefixt werden!") + self.assertGreater(s.urgency, 0.7) + + def test_high_urgency_english(self): + s = score_task("URGENT: system is down, critical error!") + self.assertGreater(s.urgency, 0.7) + + def test_low_urgency(self): + s = score_task("Update the documentation when you have time") + self.assertLess(s.urgency, 0.3) + + def test_emergency(self): + s = score_task("Notfall! Alles kaputt!") + self.assertGreater(s.urgency, 0.8) + + def test_deadline_urgency(self): + s = score_task("Deadline today, needs to be done ASAP") + self.assertGreater(s.urgency, 0.5) + + def test_no_urgency(self): + s = score_task("Nice to have feature for later") + self.assertEqual(s.urgency, 0.0) + + +class TestTriageImportance(unittest.TestCase): + """Test importance scoring dimension.""" + + def test_production_security(self): + s = score_task("Production security breach detected") + self.assertGreater(s.importance, 0.7) + + def test_customer_money(self): + s = score_task("Kunde verliert Geld wegen Fehler") + self.assertGreater(s.importance, 0.6) + + def test_regulatory(self): + s = score_task("BaFin compliance audit next week") + self.assertGreater(s.importance, 0.8) + + def test_low_importance(self): + s = score_task("Change button color on internal tool") + self.assertLess(s.importance, 0.3) + + def test_gateway_dns(self): + s = score_task("Gateway DNS resolution failing") + self.assertGreater(s.importance, 0.5) + + def test_albert_request(self): + s = score_task("Albert wants this done") + self.assertGreater(s.importance, 0.4) + + +class TestTriageEffort(unittest.TestCase): + """Test effort estimation.""" + + def test_quick_fix(self): + s = score_task("Quick fix: change config value") + self.assertLess(s.effort, 0.4) + + def test_major_refactor(self): + s = score_task("Complete refactor of the migration system") + self.assertGreater(s.effort, 0.6) + + def test_simple_german(self): + s = score_task("Einfach schnell den Typo fixen") + self.assertLess(s.effort, 0.3) + + def test_default_effort(self): + s = score_task("Do something") + self.assertAlmostEqual(s.effort, 0.5, places=1) + + +class TestTriageReadiness(unittest.TestCase): + """Test readiness/dependency detection.""" + + def test_blocked(self): + s = score_task("Blocked: waiting for API key from vendor") + self.assertLess(s.readiness, 0.5) + + def test_german_blocked(self): + s = score_task("Brauche erst die Credentials, warte auf Albert") + self.assertLess(s.readiness, 0.5) + + def test_ready(self): + s = score_task("Fix the login page CSS") + self.assertEqual(s.readiness, 1.0) + + def test_depends_on(self): + s = score_task("Depends on database migration finishing first") + self.assertLess(s.readiness, 0.5) + + +class TestTriagePriority(unittest.TestCase): + """Test overall priority scoring.""" + + def test_critical_scores_high(self): + s = score_task("URGENT: Production is down! Critical security breach!") + self.assertGreater(s.priority, 0.6) + + def test_blocked_lowers_priority(self): + ready = score_task("Fix production error") + blocked = score_task("Fix production error, blocked by vendor") + self.assertGreater(ready.priority, blocked.priority) + + def test_easy_beats_hard_all_else_equal(self): + easy = score_task("Quick config fix for gateway") + hard = score_task("Complete refactor redesign for gateway") + # Both have gateway importance, but effort differs + self.assertGreater(easy.priority, hard.priority) + + +class TestTriageRanking(unittest.TestCase): + """Test ranking multiple tasks.""" + + def test_rank_order(self): + ranked = rank_tasks([ + "Write docs", + "URGENT: Production down!", + "Update OpenClaw config", + ]) + self.assertEqual(ranked[0].text, "URGENT: Production down!") + self.assertEqual(len(ranked), 3) + + def test_rank_preserves_all(self): + tasks = ["a", "b", "c", "d"] + ranked = rank_tasks(tasks) + self.assertEqual(len(ranked), 4) + + def test_rank_descending(self): + ranked = rank_tasks(["Fix docs", "CRITICAL production down", "Nice to have"]) + for i in range(len(ranked) - 1): + self.assertGreaterEqual(ranked[i].priority, ranked[i + 1].priority) + + +class TestTriageAnalyze(unittest.TestCase): + """Test message analysis.""" + + def test_analyze_returns_signals(self): + r = analyze_message("JETZT sofort Production fixen!") + self.assertIn("jetzt", r["signals"]["urgency"]) + self.assertIn("sofort", r["signals"]["urgency"]) + self.assertIn("production", r["signals"]["importance"]) + + def test_analyze_classification_critical(self): + r = analyze_message("URGENT critical production security down!") + self.assertIn("CRITICAL", r["classification"]) + + def test_analyze_classification_low(self): + r = analyze_message("Maybe someday update the readme") + self.assertIn("LOW", r["classification"]) + + +class TestMemoryDuplicates(unittest.TestCase): + """Test duplicate detection.""" + + def setUp(self): + self.tmpdir = Path(tempfile.mkdtemp()) + self.orig_dir = mh.MEMORY_DIR + mh.MEMORY_DIR = self.tmpdir + + def tearDown(self): + mh.MEMORY_DIR = self.orig_dir + shutil.rmtree(self.tmpdir) + + def test_finds_exact_duplicates(self): + para = "This is a sufficiently long paragraph that should be detected as duplicate content across files." + (self.tmpdir / "a.md").write_text(f"# A\n\n{para}\n\nOther stuff.") + (self.tmpdir / "b.md").write_text(f"# B\n\n{para}\n\nDifferent stuff.") + dupes = mh.find_duplicates(min_length=20) + self.assertGreater(len(dupes), 0) + + def test_no_false_positives(self): + (self.tmpdir / "a.md").write_text("# Unique content A\n\nCompletely different paragraph about apples.") + (self.tmpdir / "b.md").write_text("# Unique content B\n\nTotally unrelated text about oranges.") + dupes = mh.find_duplicates(min_length=20) + self.assertEqual(len(dupes), 0) + + def test_short_paragraphs_ignored(self): + (self.tmpdir / "a.md").write_text("Hi\n\nHi") + (self.tmpdir / "b.md").write_text("Hi\n\nHi") + dupes = mh.find_duplicates(min_length=50) + self.assertEqual(len(dupes), 0) + + +class TestMemoryStaleness(unittest.TestCase): + """Test staleness detection.""" + + def setUp(self): + self.tmpdir = Path(tempfile.mkdtemp()) + self.orig_dir = mh.MEMORY_DIR + mh.MEMORY_DIR = self.tmpdir + + def tearDown(self): + mh.MEMORY_DIR = self.orig_dir + shutil.rmtree(self.tmpdir) + + def test_old_todo(self): + f = self.tmpdir / "notes.md" + f.write_text("# Notes\n\nTODO: fix this thing\n") + # Set mtime to 60 days ago + old = (datetime.now() - timedelta(days=60)).timestamp() + os.utime(f, (old, old)) + stale = mh.find_stale() + reasons = [s["reason"] for s in stale] + self.assertTrue(any("TODO" in r for r in reasons)) + + def test_old_date(self): + f = self.tmpdir / "info.md" + old_date = (datetime.now() - timedelta(days=120)).strftime("%Y-%m-%d") + f.write_text(f"Meeting on {old_date} decided X.\n") + stale = mh.find_stale() + self.assertGreater(len(stale), 0) + + def test_fresh_content_ok(self): + f = self.tmpdir / "fresh.md" + today = datetime.now().strftime("%Y-%m-%d") + f.write_text(f"Updated {today}: everything is fine.\n") + stale = mh.find_stale() + date_stale = [s for s in stale if "days old" in s.get("reason", "")] + self.assertEqual(len(date_stale), 0) + + +class TestMemoryOrphans(unittest.TestCase): + """Test orphan detection.""" + + def setUp(self): + self.tmpdir = Path(tempfile.mkdtemp()) + self.orig_dir = mh.MEMORY_DIR + mh.MEMORY_DIR = self.tmpdir + + def tearDown(self): + mh.MEMORY_DIR = self.orig_dir + shutil.rmtree(self.tmpdir) + + def test_finds_orphaned_files(self): + (self.tmpdir / "orphan.md").write_text("Nobody links to me") + (self.tmpdir / "main.md").write_text("# Main\nSome text.") + orph = mh.find_orphans() + self.assertIn("orphan.md", orph["orphaned_files"]) + + def test_finds_broken_links(self): + (self.tmpdir / "a.md").write_text("[click](nonexistent.md)") + orph = mh.find_orphans() + self.assertGreater(len(orph["broken_links"]), 0) + + def test_finds_empty_files(self): + (self.tmpdir / "empty.md").write_text("") + orph = mh.find_orphans() + self.assertIn("empty.md", orph["empty_files"]) + + def test_permanent_files_not_orphaned(self): + (self.tmpdir / "WORKING.md").write_text("Current work") + orph = mh.find_orphans() + self.assertNotIn("WORKING.md", orph["orphaned_files"]) + + +class TestMemoryStats(unittest.TestCase): + """Test stats generation.""" + + def setUp(self): + self.tmpdir = Path(tempfile.mkdtemp()) + self.orig_dir = mh.MEMORY_DIR + mh.MEMORY_DIR = self.tmpdir + + def tearDown(self): + mh.MEMORY_DIR = self.orig_dir + shutil.rmtree(self.tmpdir) + + def test_stats_structure(self): + (self.tmpdir / "a.md").write_text("Hello world") + (self.tmpdir / "b.json").write_text("{}") + stats = mh.gather_stats() + self.assertEqual(stats["total_files"], 2) + self.assertIn(".md", stats["files_by_extension"]) + self.assertIn(".json", stats["files_by_extension"]) + self.assertIsNotNone(stats["oldest"]) + self.assertIsNotNone(stats["newest"]) + + def test_empty_dir(self): + stats = mh.gather_stats() + self.assertEqual(stats["total_files"], 0) + + +class TestMemoryArchive(unittest.TestCase): + """Test archive functionality.""" + + def setUp(self): + self.tmpdir = Path(tempfile.mkdtemp()) + self.orig_dir = mh.MEMORY_DIR + self.orig_archive = mh.ARCHIVE_DIR + mh.MEMORY_DIR = self.tmpdir + mh.ARCHIVE_DIR = self.tmpdir / "archive" + + def tearDown(self): + mh.MEMORY_DIR = self.orig_dir + mh.ARCHIVE_DIR = self.orig_archive + shutil.rmtree(self.tmpdir) + + def test_dry_run_doesnt_move(self): + (self.tmpdir / "2025-01-01.md").write_text("Old note") + result = mh.archive_old_notes(older_than_days=30, execute=False) + self.assertTrue(result["dry_run"]) + self.assertEqual(result["count"], 1) + self.assertTrue((self.tmpdir / "2025-01-01.md").exists()) + + def test_execute_moves_files(self): + (self.tmpdir / "2025-01-01.md").write_text("Old note") + result = mh.archive_old_notes(older_than_days=30, execute=True) + self.assertFalse((self.tmpdir / "2025-01-01.md").exists()) + self.assertTrue((self.tmpdir / "archive" / "2025" / "2025-01-01.md").exists()) + + def test_permanent_files_kept(self): + (self.tmpdir / "WORKING.md").write_text("Keep me") + result = mh.archive_old_notes(older_than_days=1, execute=True) + self.assertTrue((self.tmpdir / "WORKING.md").exists()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_proactive.py b/tests/test_proactive.py new file mode 100644 index 0000000..e425b13 --- /dev/null +++ b/tests/test_proactive.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python3 +"""Tests for health_scanner.py and roadmap.py β€” 30+ tests.""" + +import json +import os +import sys +import tempfile +import unittest +from datetime import datetime, timedelta +from pathlib import Path +from unittest.mock import patch, MagicMock + +# Add parent to path +sys.path.insert(0, str(Path(__file__).parent)) + +from cortex.health_scanner import HealthScanner, Finding, _severity_rank, format_human, DEPRECATED_MODELS +import cortex.roadmap as roadmap + + +class TestSeverityRank(unittest.TestCase): + def test_ordering(self): + self.assertLess(_severity_rank("INFO"), _severity_rank("WARN")) + self.assertLess(_severity_rank("WARN"), _severity_rank("CRITICAL")) + + def test_unknown(self): + self.assertEqual(_severity_rank("UNKNOWN"), -1) + + +class TestFinding(unittest.TestCase): + def test_to_dict(self): + f = Finding("agents", "WARN", "test title", "detail") + d = f.to_dict() + self.assertEqual(d["section"], "agents") + self.assertEqual(d["severity"], "WARN") + self.assertEqual(d["title"], "test title") + + def test_to_dict_no_detail(self): + f = Finding("deps", "INFO", "ok") + self.assertEqual(f.to_dict()["detail"], "") + + +class TestHealthScanner(unittest.TestCase): + def _make_scanner(self, config=None): + with patch.object(HealthScanner, '_load_config') as mock_load: + scanner = HealthScanner() + scanner.config = config or {"agents": {"list": [], "defaults": {}}} + scanner.findings = [] + return scanner + + def test_no_agents(self): + scanner = self._make_scanner({"agents": {"list": []}}) + scanner.check_agents() + self.assertTrue(any("No agents" in f.title for f in scanner.findings)) + + def test_missing_workspace(self): + scanner = self._make_scanner({"agents": {"list": [ + {"id": "test", "workspace": "/nonexistent/path/xyz", "model": {"primary": "anthropic/test"}} + ]}}) + scanner.check_agents() + self.assertTrue(any("missing" in f.title.lower() for f in scanner.findings)) + + def test_valid_workspace(self): + with tempfile.TemporaryDirectory() as td: + scanner = self._make_scanner({"agents": {"list": [ + {"id": "test", "workspace": td, "model": {"primary": "anthropic/test"}} + ]}}) + scanner.check_agents() + self.assertTrue(any("OK" in f.title for f in scanner.findings)) + + @patch('cortex.health_scanner.urllib.request.urlopen') + def test_ollama_check_reachable(self, mock_urlopen): + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"models": [{"name": "mymodel:latest"}]}).encode() + mock_resp.__enter__ = lambda s: s + mock_resp.__exit__ = MagicMock(return_value=False) + mock_urlopen.return_value = mock_resp + + scanner = self._make_scanner() + scanner._check_ollama_model("test", "ollama-desktop/mymodel:latest") + self.assertTrue(any("available" in f.title for f in scanner.findings)) + + @patch('cortex.health_scanner.urllib.request.urlopen') + def test_ollama_check_unreachable(self, mock_urlopen): + mock_urlopen.side_effect = Exception("connection refused") + scanner = self._make_scanner() + scanner._check_ollama_model("test", "ollama-desktop/model:latest") + self.assertTrue(any("unreachable" in f.title.lower() for f in scanner.findings)) + + def test_memory_check(self): + scanner = self._make_scanner() + scanner._check_memory() + self.assertTrue(any("Memory" in f.title for f in scanner.findings)) + + def test_disk_check(self): + scanner = self._make_scanner() + scanner._check_disk() + self.assertTrue(any("Disk" in f.title for f in scanner.findings)) + + @patch('cortex.health_scanner.subprocess.run') + def test_nats_active(self, mock_run): + mock_run.return_value = MagicMock(returncode=0, stdout="active") + scanner = self._make_scanner() + scanner._check_nats() + self.assertTrue(any("NATS" in f.title and f.severity == "INFO" for f in scanner.findings)) + + @patch('cortex.health_scanner.subprocess.run') + def test_nats_inactive(self, mock_run): + mock_run.return_value = MagicMock(returncode=3, stdout="inactive") + scanner = self._make_scanner() + scanner._check_nats() + self.assertTrue(any("NATS" in f.title and f.severity == "CRITICAL" for f in scanner.findings)) + + def test_deprecated_model_detection(self): + scanner = self._make_scanner({"agents": { + "list": [{"id": "test", "model": {"primary": "anthropic/claude-3-5-haiku-latest", "fallbacks": []}}], + "defaults": {"model": {}, "models": {}, "heartbeat": {}} + }}) + scanner.check_config() + self.assertTrue(any("EOL" in f.title for f in scanner.findings)) + + def test_run_all_sections(self): + scanner = self._make_scanner({"agents": {"list": [], "defaults": {"model": {}, "models": {}, "heartbeat": {}}}}) + report = scanner.run() + self.assertIn("timestamp", report) + self.assertIn("overall", report) + self.assertIn("findings", report) + + def test_run_single_section(self): + scanner = self._make_scanner({"agents": {"list": []}}) + report = scanner.run(["agents"]) + self.assertTrue(all(f["section"] == "agents" for f in report["findings"])) + + def test_format_human(self): + report = { + "timestamp": "2026-02-08T20:00:00", + "overall": "WARN", + "findings_count": {"INFO": 1, "WARN": 1, "CRITICAL": 0}, + "findings": [ + {"section": "agents", "severity": "WARN", "title": "test warn", "detail": ""}, + {"section": "deps", "severity": "INFO", "title": "test info", "detail": ""}, + ] + } + output = format_human(report) + self.assertIn("WARN", output) + self.assertIn("test warn", output) + + +class TestRoadmap(unittest.TestCase): + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.roadmap_file = Path(self.tmpdir) / "roadmap.json" + self.seed_data = { + "items": [ + { + "id": "item-001", + "title": "Test task 1", + "status": "open", + "priority": "P0", + "deadline": (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d"), + "depends_on": [], + "tags": ["infra"], + "created": "2026-01-01T00:00:00", + "updated": "2026-01-01T00:00:00", + "notes": "" + }, + { + "id": "item-002", + "title": "Test task 2", + "status": "in_progress", + "priority": "P1", + "deadline": (datetime.now() + timedelta(days=3)).strftime("%Y-%m-%d"), + "depends_on": ["item-001"], + "tags": ["business"], + "created": "2026-01-01T00:00:00", + "updated": "2026-01-01T00:00:00", + "notes": "" + }, + { + "id": "item-003", + "title": "Done task", + "status": "done", + "priority": "P2", + "deadline": "2026-01-01", + "depends_on": [], + "tags": ["infra"], + "created": "2026-01-01T00:00:00", + "updated": "2026-01-01T00:00:00", + "notes": "" + }, + { + "id": "item-004", + "title": "Stale task", + "status": "in_progress", + "priority": "P2", + "deadline": null if False else None, + "depends_on": ["item-999"], + "tags": [], + "created": "2026-01-01T00:00:00", + "updated": (datetime.now() - timedelta(days=10)).strftime("%Y-%m-%dT%H:%M:%S"), + "notes": "" + } + ] + } + self.roadmap_file.write_text(json.dumps(self.seed_data)) + + def tearDown(self): + import shutil + shutil.rmtree(self.tmpdir) + + def test_load(self): + data = roadmap.load_roadmap(self.roadmap_file) + self.assertEqual(len(data["items"]), 4) + + def test_load_nonexistent(self): + data = roadmap.load_roadmap(Path(self.tmpdir) / "nope.json") + self.assertEqual(data, {"items": []}) + + def test_save_and_reload(self): + data = roadmap.load_roadmap(self.roadmap_file) + data["items"].append({"id": "new", "title": "new"}) + roadmap.save_roadmap(data, self.roadmap_file) + reloaded = roadmap.load_roadmap(self.roadmap_file) + self.assertEqual(len(reloaded["items"]), 5) + + def test_find_item_full_id(self): + data = roadmap.load_roadmap(self.roadmap_file) + item = roadmap.find_item(data, "item-001") + self.assertIsNotNone(item) + self.assertEqual(item["title"], "Test task 1") + + def test_find_item_prefix(self): + data = roadmap.load_roadmap(self.roadmap_file) + item = roadmap.find_item(data, "item-00") + self.assertIsNotNone(item) + + def test_find_item_missing(self): + data = roadmap.load_roadmap(self.roadmap_file) + self.assertIsNone(roadmap.find_item(data, "nonexistent")) + + def test_parse_date_iso(self): + d = roadmap.parse_date("2026-02-08") + self.assertEqual(d.year, 2026) + self.assertEqual(d.month, 2) + + def test_parse_date_with_time(self): + d = roadmap.parse_date("2026-02-08 09:00") + self.assertEqual(d.hour, 9) + + def test_parse_date_none(self): + self.assertIsNone(roadmap.parse_date(None)) + self.assertIsNone(roadmap.parse_date("not-a-date")) + + def test_overdue_detection(self, ): + """Item-001 has deadline in the past β†’ overdue.""" + data = roadmap.load_roadmap(self.roadmap_file) + now = datetime.now() + overdue = [i for i in data["items"] if i["status"] != "done" + and roadmap.parse_date(i.get("deadline")) + and roadmap.parse_date(i["deadline"]) < now] + self.assertEqual(len(overdue), 1) + self.assertEqual(overdue[0]["id"], "item-001") + + def test_upcoming_detection(self): + data = roadmap.load_roadmap(self.roadmap_file) + now = datetime.now() + cutoff = now + timedelta(days=7) + upcoming = [i for i in data["items"] if i["status"] != "done" + and roadmap.parse_date(i.get("deadline")) + and now <= roadmap.parse_date(i["deadline"]) <= cutoff] + self.assertEqual(len(upcoming), 1) + self.assertEqual(upcoming[0]["id"], "item-002") + + def test_dep_check_missing(self): + data = roadmap.load_roadmap(self.roadmap_file) + id_map = {i["id"]: i for i in data["items"]} + # item-004 depends on item-999 which doesn't exist + item = id_map["item-004"] + for dep_id in item["depends_on"]: + self.assertNotIn(dep_id, id_map) + + def test_dep_check_incomplete(self): + data = roadmap.load_roadmap(self.roadmap_file) + id_map = {i["id"]: i for i in data["items"]} + # item-002 depends on item-001 which is not done + item = id_map["item-002"] + for dep_id in item["depends_on"]: + dep = id_map.get(dep_id) + self.assertIsNotNone(dep) + self.assertNotEqual(dep["status"], "done") + + def test_stale_detection(self): + data = roadmap.load_roadmap(self.roadmap_file) + now = datetime.now() + stale = [i for i in data["items"] + if i["status"] == "in_progress" + and roadmap.parse_date(i.get("updated")) + and (now - roadmap.parse_date(i["updated"])).days > 7] + self.assertTrue(len(stale) >= 1) + stale_ids = [s["id"] for s in stale] + self.assertIn("item-004", stale_ids) + + def test_valid_statuses(self): + for s in ["open", "in_progress", "blocked", "done"]: + self.assertIn(s, roadmap.VALID_STATUSES) + + def test_valid_priorities(self): + for p in ["P0", "P1", "P2", "P3"]: + self.assertIn(p, roadmap.VALID_PRIORITIES) + + def test_now_iso(self): + ts = roadmap.now_iso() + # Should be parseable + dt = datetime.fromisoformat(ts) + self.assertIsInstance(dt, datetime) + + +class TestReportGeneration(unittest.TestCase): + def test_report_contains_sections(self): + """Capture report output and verify structure.""" + import io + from contextlib import redirect_stdout + + data = { + "items": [ + {"id": "r1", "title": "Overdue thing", "status": "open", "priority": "P0", + "deadline": "2020-01-01", "depends_on": [], "tags": [], "created": "2020-01-01T00:00:00", + "updated": "2020-01-01T00:00:00", "notes": ""}, + {"id": "r2", "title": "Active thing", "status": "in_progress", "priority": "P1", + "deadline": (datetime.now() + timedelta(days=2)).strftime("%Y-%m-%d"), + "depends_on": [], "tags": [], "created": "2020-01-01T00:00:00", + "updated": "2020-01-01T00:00:00", "notes": ""}, + ] + } + + f = io.StringIO() + args = type('Args', (), {})() + with redirect_stdout(f): + roadmap.cmd_report(args, data) + output = f.getvalue() + self.assertIn("Roadmap Status Report", output) + self.assertIn("Overdue", output) + self.assertIn("Overdue thing", output) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_selfheal.py b/tests/test_selfheal.py new file mode 100644 index 0000000..db173b0 --- /dev/null +++ b/tests/test_selfheal.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 +"""Tests for feedback_loop.py and validate_output.py β€” 30+ tests.""" + +import json +import os +import sys +import tempfile +import unittest +from pathlib import Path +from datetime import datetime, timedelta, timezone + +sys.path.insert(0, str(Path(__file__).parent)) + +from cortex.feedback_loop import ( + parse_since, parse_jsonl, get_text_content, get_tool_name, + detect_corrections, detect_retries, detect_tool_failures, + detect_self_errors, detect_knowledge_gaps, analyze_session, + finding_to_markdown, deduplicate_findings, load_config, + get_session_timestamp, Finding, append_to_growth_log, +) +from cortex.validate_output import ( + extract_mentioned_files, extract_created_files_from_transcript, + extract_transcript_errors, check_test_results, validate_file_exists, + validate_python_syntax, validate_typescript_structure, run_validation, + ValidationReport, parse_jsonl as v_parse_jsonl, +) + +DEFAULT_CONFIG = load_config() + + +def make_entry(role="user", text="hello", entry_type="message", **kwargs): + """Helper to create a transcript entry.""" + e = {"type": entry_type, "message": {"role": role, "content": [{"type": "text", "text": text}]}} + e.update(kwargs) + return e + + +def make_tool_call(name="exec", args=None): + return {"type": "message", "message": {"role": "assistant", "content": [ + {"type": "toolCall", "name": name, "arguments": args or {}} + ]}} + + +def make_tool_result(text="ok", is_error=False): + return {"type": "message", "message": {"role": "toolResult", "content": [ + {"type": "text", "text": text} + ], "isError": is_error}} + + +def write_jsonl(path, entries): + with open(path, "w") as f: + for e in entries: + f.write(json.dumps(e) + "\n") + + +# === Feedback Loop Tests === + +class TestParseSince(unittest.TestCase): + def test_hours(self): + result = parse_since("24h") + self.assertAlmostEqual( + (datetime.now(timezone.utc) - result).total_seconds(), 86400, delta=5) + + def test_days(self): + result = parse_since("7d") + self.assertAlmostEqual( + (datetime.now(timezone.utc) - result).total_seconds(), 604800, delta=5) + + def test_minutes(self): + result = parse_since("30m") + self.assertAlmostEqual( + (datetime.now(timezone.utc) - result).total_seconds(), 1800, delta=5) + + def test_invalid(self): + with self.assertRaises(ValueError): + parse_since("abc") + + +class TestGetTextContent(unittest.TestCase): + def test_list_content(self): + entry = make_entry(text="hello world") + self.assertEqual(get_text_content(entry), "hello world") + + def test_string_content(self): + entry = {"message": {"content": "plain string"}} + self.assertEqual(get_text_content(entry), "plain string") + + def test_empty(self): + entry = {"message": {}} + self.assertEqual(get_text_content(entry), "") + + +class TestGetToolName(unittest.TestCase): + def test_tool_call(self): + entry = make_tool_call("read") + self.assertEqual(get_tool_name(entry), "read") + + def test_no_tool(self): + entry = make_entry() + self.assertIsNone(get_tool_name(entry)) + + +class TestDetectCorrections(unittest.TestCase): + def test_detects_nein(self): + entries = [ + make_entry("assistant", "Here is the result"), + make_entry("user", "nein, das ist falsch"), + ] + findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertTrue(len(findings) >= 1) + self.assertEqual(findings[0].category, "correction") + + def test_detects_wrong(self): + entries = [ + make_entry("assistant", "Done!"), + make_entry("user", "That's wrong"), + ] + findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertTrue(len(findings) >= 1) + + def test_no_false_positive(self): + entries = [ + make_entry("assistant", "Here you go"), + make_entry("user", "Thanks, great job!"), + ] + findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + def test_requires_assistant_before(self): + entries = [ + make_entry("user", "nein"), + ] + findings = detect_corrections(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + +class TestDetectRetries(unittest.TestCase): + def test_detects_3_retries(self): + entries = [make_tool_call("exec") for _ in range(4)] + findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 1) + self.assertIn("4 times", findings[0].trigger) + + def test_no_retry_under_threshold(self): + entries = [make_tool_call("exec") for _ in range(2)] + findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + def test_different_tools_no_retry(self): + entries = [make_tool_call("exec"), make_tool_call("read"), make_tool_call("write")] + findings = detect_retries(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + +class TestDetectToolFailures(unittest.TestCase): + def test_detects_is_error(self): + entries = [make_tool_result("something failed", is_error=True)] + findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertTrue(len(findings) >= 1) + + def test_detects_error_text(self): + entries = [make_tool_result("Error: command not found")] + # toolResult role needed + entries[0]["message"]["role"] = "toolResult" + findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertTrue(len(findings) >= 1) + + def test_no_error(self): + entries = [make_tool_result("All good")] + entries[0]["message"]["role"] = "toolResult" + findings = detect_tool_failures(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + +class TestDetectSelfErrors(unittest.TestCase): + def test_detects_sorry(self): + entries = [make_entry("assistant", "Sorry, I made a mistake there")] + findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 1) + + def test_detects_entschuldigung(self): + entries = [make_entry("assistant", "Entschuldigung, das war falsch")] + findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 1) + + def test_no_apology(self): + entries = [make_entry("assistant", "Here is your answer")] + findings = detect_self_errors(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + +class TestDetectKnowledgeGaps(unittest.TestCase): + def test_detects_gap(self): + entries = [ + make_entry("assistant", "I couldn't find that information"), + make_entry("user", "The answer is actually in the config file at /etc/app/config.yaml"), + ] + findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 1) + + def test_no_gap_short_reply(self): + entries = [ + make_entry("assistant", "I couldn't find it"), + make_entry("user", "ok"), + ] + findings = detect_knowledge_gaps(entries, DEFAULT_CONFIG, "test.jsonl") + self.assertEqual(len(findings), 0) + + +class TestFindingToMarkdown(unittest.TestCase): + def test_format(self): + f = Finding("correction", "User said 'nein'", (10, 12), "test.jsonl") + md = finding_to_markdown(f, DEFAULT_CONFIG) + self.assertIn("Auto-detected: User Correction", md) + self.assertIn("User said 'nein'", md) + self.assertIn("lines 10-12", md) + + +class TestDeduplicate(unittest.TestCase): + def test_removes_dupes(self): + f1 = Finding("correction", "User said nein after agent response", (1, 2), "a.jsonl") + f2 = Finding("correction", "User said nein after agent response", (5, 6), "a.jsonl") + result = deduplicate_findings([f1, f2]) + self.assertEqual(len(result), 1) + + +class TestParseJsonl(unittest.TestCase): + def test_parses_valid(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write('{"a":1}\n{"b":2}\n') + f.flush() + result = parse_jsonl(Path(f.name)) + self.assertEqual(len(result), 2) + os.unlink(f.name) + + def test_skips_bad_lines(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write('{"a":1}\nBAD LINE\n{"b":2}\n') + f.flush() + result = parse_jsonl(Path(f.name)) + self.assertEqual(len(result), 2) + os.unlink(f.name) + + +# === Validator Tests === + +class TestExtractMentionedFiles(unittest.TestCase): + def test_backtick_files(self): + task = "Create `feedback_loop.py` and `config.json`" + files = extract_mentioned_files(task) + self.assertIn("feedback_loop.py", files) + self.assertIn("config.json", files) + + def test_bare_files(self): + task = "Write test_selfheal.py with 25 tests" + files = extract_mentioned_files(task) + self.assertIn("test_selfheal.py", files) + + +class TestExtractCreatedFiles(unittest.TestCase): + def test_write_calls(self): + entries = [{"type": "message", "message": {"role": "assistant", "content": [ + {"type": "toolCall", "name": "Write", "arguments": {"path": "/tmp/test.py"}} + ]}}] + files = extract_created_files_from_transcript(entries) + self.assertIn("/tmp/test.py", files) + + +class TestValidateFileExists(unittest.TestCase): + def test_existing_file(self): + with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".py") as f: + f.write("print('hi')") + f.flush() + ok, detail = validate_file_exists(f.name) + self.assertTrue(ok) + os.unlink(f.name) + + def test_missing_file(self): + ok, _ = validate_file_exists("/tmp/nonexistent_xyz_abc.py") + self.assertFalse(ok) + + def test_empty_file(self): + with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: + f.flush() + ok, detail = validate_file_exists(f.name) + self.assertFalse(ok) + self.assertIn("empty", detail) + os.unlink(f.name) + + +class TestValidatePythonSyntax(unittest.TestCase): + def test_valid_python(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def hello():\n return 42\n") + f.flush() + ok, _ = validate_python_syntax(f.name) + self.assertTrue(ok) + os.unlink(f.name) + + def test_invalid_python(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def hello(\n") + f.flush() + ok, detail = validate_python_syntax(f.name) + self.assertFalse(ok) + self.assertIn("Syntax error", detail) + os.unlink(f.name) + + +class TestValidateTypescript(unittest.TestCase): + def test_valid_ts(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f: + f.write("export function hello(): string { return 'hi'; }\n") + f.flush() + ok, _ = validate_typescript_structure(f.name) + self.assertTrue(ok) + os.unlink(f.name) + + def test_excessive_any(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f: + f.write("export " + "const x: any = 1;\n" * 15) + f.flush() + ok, detail = validate_typescript_structure(f.name) + self.assertFalse(ok) + self.assertIn("any", detail) + os.unlink(f.name) + + +class TestCheckTestResults(unittest.TestCase): + def test_pytest_pass(self): + entries = [make_tool_result("10 passed in 0.5s")] + entries[0]["message"]["role"] = "toolResult" + result = check_test_results(entries) + self.assertTrue(result["tests_found"]) + self.assertTrue(result["tests_passed"]) + + def test_pytest_fail(self): + entries = [make_tool_result("3 failed, 7 passed")] + entries[0]["message"]["role"] = "toolResult" + result = check_test_results(entries) + self.assertTrue(result["tests_found"]) + self.assertFalse(result["tests_passed"]) + + +class TestValidationReport(unittest.TestCase): + def test_report_pass(self): + r = ValidationReport() + r.add("check1", "pass") + self.assertTrue(r.ok) + self.assertEqual(r.passed, 1) + + def test_report_fail(self): + r = ValidationReport() + r.add("check1", "fail", "broken") + self.assertFalse(r.ok) + + def test_to_json(self): + r = ValidationReport() + r.add("check1", "pass") + j = r.to_json() + self.assertIn("checks", j) + self.assertTrue(j["ok"]) + + +class TestIntegration(unittest.TestCase): + """Integration test with a mock session.""" + + def test_full_feedback_loop(self): + entries = [ + {"type": "session", "id": "test-123", "timestamp": "2026-02-08T10:00:00Z"}, + make_entry("assistant", "Here is the file"), + make_entry("user", "nein, das ist falsch"), + make_tool_call("exec"), + make_tool_call("exec"), + make_tool_call("exec"), + make_tool_call("exec"), + make_entry("assistant", "Sorry, I made an error"), + make_tool_result("Error: permission denied", is_error=True), + ] + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + write_jsonl(f.name, entries) + findings = analyze_session(parse_jsonl(Path(f.name)), DEFAULT_CONFIG, f.name) + os.unlink(f.name) + categories = {f.category for f in findings} + self.assertIn("correction", categories) + self.assertIn("retry", categories) + self.assertIn("self_error", categories) + self.assertIn("tool_failure", categories) + + def test_full_validation(self): + # Create a real file to validate + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as pyf: + pyf.write("print('hello')\n") + pyf.flush() + py_path = pyf.name + + entries = [ + {"type": "session", "id": "val-test", "timestamp": "2026-02-08T10:00:00Z"}, + {"type": "message", "message": {"role": "assistant", "content": [ + {"type": "toolCall", "name": "Write", "arguments": {"path": py_path}} + ]}}, + make_tool_result("25 passed in 1.2s"), + ] + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + write_jsonl(f.name, entries) + report = run_validation(Path(f.name), f"Create {py_path} with tests") + os.unlink(f.name) + os.unlink(py_path) + self.assertIsInstance(report, ValidationReport) + self.assertTrue(report.passed > 0) + + +class TestAppendToGrowthLog(unittest.TestCase): + def test_dry_run(self): + f = Finding("correction", "test trigger", (1, 2), "test.jsonl") + text = append_to_growth_log([f], DEFAULT_CONFIG, dry_run=True) + self.assertIn("Auto-detected", text) + + def test_empty_findings(self): + text = append_to_growth_log([], DEFAULT_CONFIG, dry_run=True) + self.assertEqual(text, "") + + +if __name__ == "__main__": + unittest.main()