Merge darkplex-core into cortex — unified intelligence layer v0.2.0

- Merged all unique darkplex-core modules into cortex: - intelligence/ subfolder (anticipator, collective, shared_memory, knowledge_cleanup, temporal, llm_extractor, loop) - governance/ subfolder (policy engine, risk scorer, evidence, enforcer, report generator) - entity_manager.py, knowledge_extractor.py - Fixed bare 'from intelligence.' imports to 'from cortex.intelligence.' - Added 'darkplex' CLI alias alongside 'cortex' - Package renamed to darkplex-core v0.2.0 - 405 tests passing (was 234) - 14 new test files covering all merged modules
2026-02-12 08:43:02 +01:00 · 2026-02-12 08:43:02 +01:00 · fd7d75c0ed
commit fd7d75c0ed
parent fda607c204
41 changed files with 6368 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ dist/
 build/
 .eggs/
 .pytest_cache/
 .coverage
--- a/cortex/entity_manager.py
+++ b/cortex/entity_manager.py
@ -0,0 +1,371 @@
 #!/usr/bin/env python3
 """
 Entity Manager — File-based knowledge graph for entity extraction and relationship mapping.
 Part of Level 4.4 AGI Roadmap.
 Usage:
    entity-manager.py bootstrap              — Bootstrap from life/areas/
    entity-manager.py extract "text"          — Extract entities from text
    entity-manager.py relate "A" "B" [type]   — Create/update relationship
    entity-manager.py query "entity"          — Query relationships for entity
    entity-manager.py graph                   — Output relationship summary
 """
 import sys
 import os
 import json
 import re
 import time
 from pathlib import Path
 KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
 ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
 RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
 LIFE_AREAS = Path.home() / "life" / "areas"
 # Common words to skip during entity extraction
 STOP_WORDS = {
    "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
    "have", "has", "had", "do", "does", "did", "will", "would", "could",
    "should", "may", "might", "shall", "can", "need", "must", "i", "you",
    "he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
    "my", "your", "his", "its", "our", "their", "this", "that", "these",
    "those", "what", "which", "who", "whom", "where", "when", "why", "how",
    "all", "each", "every", "both", "few", "more", "most", "other", "some",
    "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too",
    "very", "just", "because", "as", "until", "while", "of", "at", "by",
    "for", "with", "about", "against", "between", "through", "during",
    "before", "after", "above", "below", "to", "from", "up", "down", "in",
    "out", "on", "off", "over", "under", "again", "further", "then", "once",
    "here", "there", "and", "but", "or", "if", "then", "else", "also",
    "system", "cron", "heartbeat", "ok", "error", "warning", "info",
    "message", "session", "agent", "main", "matrix", "telegram",
    "read", "write", "check", "run", "send", "get", "set", "let", "see",
    "know", "think", "want", "like", "make", "take", "come", "go", "say",
    "tell", "ask", "try", "use", "find", "give", "new", "good", "first",
    "last", "long", "great", "little", "right", "big", "high", "old",
    "different", "small", "large", "next", "early", "young", "important",
    "public", "bad", "sure", "sure", "yes", "no", "maybe", "ok", "okay",
    "thanks", "thank", "please", "hello", "hi", "hey", "bye", "well",
    "now", "today", "tomorrow", "yesterday", "monday", "tuesday",
    "wednesday", "thursday", "friday", "saturday", "sunday",
    "january", "february", "march", "april", "may", "june", "july",
    "august", "september", "october", "november", "december",
    "still", "already", "currently", "actually", "really", "right",
    "look", "keep", "going", "based", "done", "work", "working",
 }
 def normalize(name):
    """Normalize entity name."""
    return name.strip().lower().replace("_", "-")
 def load_json(path):
    """Load JSON file, return empty dict if missing/invalid."""
    try:
        with open(path) as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}
 def save_json(path, data):
    """Save JSON file, creating directories as needed."""
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
 def load_known_entities():
    """Load known entity names from life/areas/ and entities.json."""
    known = {}
    # From life/areas
    for category in ["people", "companies", "projects"]:
        area_dir = LIFE_AREAS / category
        if not area_dir.exists():
            continue
        etype = category.rstrip("s")  # person, company, project
        if category == "people":
            etype = "person"
        for entry in area_dir.iterdir():
            if entry.is_dir():
                name = normalize(entry.name)
                known[name] = {"type": etype, "source": f"life/areas/{category}"}
    # From entities.json
    entities = load_json(ENTITIES_FILE)
    for name, info in entities.items():
        if name not in known:
            known[name] = info
    return known
 def extract_entities(text, known=None):
    """Extract entities from text using heuristics and known entity matching."""
    if known is None:
        known = load_known_entities()
    found = {}
    text_lower = text.lower()
    # 1. Match known entities
    for name, info in known.items():
        # Check for name or slug in text
        variants = [name, name.replace("-", " "), name.replace("-", "")]
        for v in variants:
            if v in text_lower and len(v) > 2:
                found[name] = {"type": info.get("type", "unknown"), "match": "known"}
                break
    # 2. Extract @mentions
    for m in re.finditer(r"@(\w+)", text):
        name = normalize(m.group(1))
        if name not in found and name not in STOP_WORDS and len(name) > 2:
            found[name] = {"type": "person", "match": "mention"}
    # 3. Extract capitalized multi-word names (likely proper nouns)
    for m in re.finditer(r"\b([A-Z][a-zäöüß]+(?:\s+[A-Z][a-zäöüß]+)+)\b", text):
        name = normalize(m.group(1))
        if name not in found and name not in STOP_WORDS and len(name) > 3:
            # Heuristic: if 2-3 words, likely person; if more, likely org/topic
            words = name.split()
            etype = "person" if len(words) <= 3 else "topic"
            found[name] = {"type": etype, "match": "capitalized"}
    # 4. Extract standalone capitalized words (potential entities)
    for m in re.finditer(r"\b([A-Z][a-zäöüß]{2,})\b", text):
        name = normalize(m.group(1))
        if name not in found and name not in STOP_WORDS:
            found[name] = {"type": "unknown", "match": "capitalized_single"}
    # 5. Extract ALL-CAPS acronyms (likely companies/products)
    for m in re.finditer(r"\b([A-Z]{2,6})\b", text):
        name = normalize(m.group(1))
        if name not in found and name not in STOP_WORDS and name not in {
            "ok", "am", "pm", "gmt", "utc", "url", "api", "cli", "ssh", "dns",
            "http", "https", "json", "html", "css", "js", "ts", "py", "md",
            "id", "ui", "ux", "io", "os", "ip", "gb", "mb", "kb", "tb",
        }:
            found[name] = {"type": "organization", "match": "acronym"}
    return found
 def cmd_bootstrap():
    """Bootstrap entities from life/areas/."""
    entities = load_json(ENTITIES_FILE)
    relationships = load_json(RELATIONSHIPS_FILE)
    count = 0
    for category in ["people", "companies"]:
        area_dir = LIFE_AREAS / category
        if not area_dir.exists():
            continue
        etype = "person" if category == "people" else "company"
        for entry in sorted(area_dir.iterdir()):
            if not entry.is_dir():
                continue
            name = normalize(entry.name)
            if name in entities:
                continue
            info = {"type": etype, "source": f"life/areas/{category}", "bootstrapped": True}
            # Try to extract extra info from summary.md
            summary_path = entry / "summary.md"
            if summary_path.exists():
                try:
                    summary = summary_path.read_text(errors="replace")[:2000]
                    # Extract email
                    em = re.search(r"\*\*Email:\*\*\s*(\S+)", summary)
                    if em:
                        info["email"] = em.group(1)
                    # Extract context
                    ctx = re.search(r"\*\*Kontext:\*\*\s*(.+)", summary)
                    if ctx:
                        info["context"] = ctx.group(1).strip()
                except Exception:
                    pass
            entities[name] = info
            count += 1
    save_json(ENTITIES_FILE, entities)
    save_json(RELATIONSHIPS_FILE, relationships)
    print(f"Bootstrapped {count} new entities. Total: {len(entities)}")
 def cmd_extract(text):
    """Extract and display entities from text."""
    known = load_known_entities()
    found = extract_entities(text, known)
    if not found:
        print("No entities found.")
        return
    # Update entities.json with new discoveries
    entities = load_json(ENTITIES_FILE)
    new_count = 0
    for name, info in found.items():
        if name not in entities:
            entities[name] = {
                "type": info["type"],
                "source": "extraction",
                "first_seen": time.strftime("%Y-%m-%dT%H:%M:%S"),
            }
            new_count += 1
        print(f"  [{info['type']:12s}] {name} ({info['match']})")
    if new_count:
        save_json(ENTITIES_FILE, entities)
        print(f"\n{new_count} new entities added to registry.")
 def cmd_relate(entity_a, entity_b, rel_type="related"):
    """Create or update a relationship between two entities."""
    a, b = normalize(entity_a), normalize(entity_b)
    relationships = load_json(RELATIONSHIPS_FILE)
    entities = load_json(ENTITIES_FILE)
    key = f"{min(a,b)}::{max(a,b)}"
    ts = time.strftime("%Y-%m-%dT%H:%M:%S")
    if key in relationships:
        rel = relationships[key]
        rel["count"] = rel.get("count", 1) + 1
        rel["last_seen"] = ts
        if rel_type != "related" and rel_type not in rel.get("types", []):
            rel.setdefault("types", []).append(rel_type)
        print(f"Updated: {a} <-> {b} (seen {rel['count']}x)")
    else:
        relationships[key] = {
            "a": a, "b": b,
            "types": [rel_type],
            "count": 1,
            "first_seen": ts,
            "last_seen": ts,
        }
        print(f"Created: {a} <-> {b} ({rel_type})")
    # Ensure both entities exist
    for name in [a, b]:
        if name not in entities:
            entities[name] = {"type": "unknown", "source": "relationship", "first_seen": ts}
    save_json(RELATIONSHIPS_FILE, relationships)
    save_json(ENTITIES_FILE, entities)
 def cmd_query(entity_name):
    """Query all relationships for an entity."""
    name = normalize(entity_name)
    relationships = load_json(RELATIONSHIPS_FILE)
    entities = load_json(ENTITIES_FILE)
    # Entity info
    if name in entities:
        info = entities[name]
        print(f"Entity: {name}")
        print(f"  Type: {info.get('type', 'unknown')}")
        if info.get("email"):
            print(f"  Email: {info['email']}")
        if info.get("context"):
            print(f"  Context: {info['context']}")
        if info.get("source"):
            print(f"  Source: {info['source']}")
    else:
        print(f"Entity '{name}' not found in registry.")
    # Relationships
    rels = []
    for key, rel in relationships.items():
        if rel["a"] == name or rel["b"] == name:
            other = rel["b"] if rel["a"] == name else rel["a"]
            rels.append((other, rel))
    if rels:
        print(f"\nRelationships ({len(rels)}):")
        for other, rel in sorted(rels, key=lambda x: -x[1].get("count", 1)):
            types = ", ".join(rel.get("types", ["related"]))
            print(f"  {name} <-> {other} [{types}] (seen {rel.get('count', 1)}x)")
    else:
        print("\nNo relationships found.")
    # Check life/areas/
    for category in ["people", "companies", "projects"]:
        area_path = LIFE_AREAS / category / name.replace(" ", "-")
        if area_path.exists():
            summary_path = area_path / "summary.md"
            if summary_path.exists():
                print(f"\nLife area ({category}): {area_path}")
                content = summary_path.read_text(errors="replace")[:500]
                print(content)
 def cmd_graph():
    """Output a simple relationship graph summary."""
    relationships = load_json(RELATIONSHIPS_FILE)
    entities = load_json(ENTITIES_FILE)
    if not relationships:
        print("No relationships in knowledge graph.")
        return
    # Count connections per entity
    connections = {}
    for key, rel in relationships.items():
        for name in [rel["a"], rel["b"]]:
            connections[name] = connections.get(name, 0) + 1
    # Sort by connections
    top = sorted(connections.items(), key=lambda x: -x[1])
    print(f"Knowledge Graph: {len(entities)} entities, {len(relationships)} relationships\n")
    print("Top connected entities:")
    for name, count in top[:20]:
        etype = entities.get(name, {}).get("type", "?")
        print(f"  {name} ({etype}): {count} connections")
    print(f"\nRecent relationships:")
    recent = sorted(relationships.values(), key=lambda r: r.get("last_seen", ""), reverse=True)[:10]
    for rel in recent:
        types = ", ".join(rel.get("types", ["related"]))
        print(f"  {rel['a']} <-> {rel['b']} [{types}]")
 def main():
    if len(sys.argv) < 2:
        print(__doc__)
        sys.exit(1)
    cmd = sys.argv[1]
    if cmd == "bootstrap":
        cmd_bootstrap()
    elif cmd == "extract":
        if len(sys.argv) < 3:
            print("Usage: entity-manager.py extract \"text\"")
            sys.exit(1)
        cmd_extract(" ".join(sys.argv[2:]))
    elif cmd == "relate":
        if len(sys.argv) < 4:
            print("Usage: entity-manager.py relate \"entity_a\" \"entity_b\" [type]")
            sys.exit(1)
        rel_type = sys.argv[4] if len(sys.argv) > 4 else "related"
        cmd_relate(sys.argv[2], sys.argv[3], rel_type)
    elif cmd == "query":
        if len(sys.argv) < 3:
            print("Usage: entity-manager.py query \"entity\"")
            sys.exit(1)
        cmd_query(" ".join(sys.argv[2:]))
    elif cmd == "graph":
        cmd_graph()
    else:
        print(f"Unknown command: {cmd}")
        print(__doc__)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/cortex/governance/init.py
+++ b/cortex/governance/init.py
--- a/cortex/governance/cli.py
+++ b/cortex/governance/cli.py
@ -0,0 +1,228 @@
 """Governance CLI — policy evaluation, risk scoring, evidence & reporting.
 Usage:
    darkplex governance evaluate --agent <name> --action <action> [--data-type <type>] [--target <target>] [--role <role>]
    darkplex governance risk --agent <name> --action <action> [--data-type <type>] [--target <target>] [--role <role>]
    darkplex governance evidence [--agent <name>] [--verdict <verdict>] [--control <id>] [--json]
    darkplex governance report [--agent <name>] [--json] [--output <path>]
    darkplex governance policies [--reload]
    darkplex governance status
 """
 from __future__ import annotations
 import argparse
 import json
 import os
 import sys
 from pathlib import Path
 # Default paths
 DEFAULT_POLICIES_DIR = os.environ.get(
    "GOVERNANCE_POLICIES_DIR",
    str(Path(__file__).parent / "policies"),
 )
 DEFAULT_CONTROLS_MAPPING = os.environ.get(
    "GOVERNANCE_CONTROLS_MAPPING",
    str(Path(__file__).parent / "controls" / "iso27001-mapping.yaml"),
 )
 def _build_context(args: argparse.Namespace) -> dict:
    """Build an evaluation context from CLI args."""
    ctx = {}
    if args.agent:
        ctx["agent"] = args.agent
    if args.action:
        ctx["action"] = args.action
    if args.data_type:
        ctx["data_type"] = args.data_type
    if args.target:
        ctx["target"] = args.target
    if args.role:
        ctx["agent_role"] = args.role
    return ctx
 def _get_engine():
    from governance.policy import PolicyEngine
    return PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR)
 def _get_scorer():
    from governance.risk_scorer import RiskScorer
    return RiskScorer()
 def _get_enforcer():
    from governance.enforcer import Enforcer
    from governance.policy import PolicyEngine
    from governance.risk_scorer import RiskScorer
    from governance.evidence import EvidenceCollector, ControlMapping
    return Enforcer(
        policy_engine=PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR),
        risk_scorer=RiskScorer(),
        evidence_collector=EvidenceCollector(
            control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING)
        ),
    )
 def cmd_evaluate(args: argparse.Namespace) -> None:
    """Full governance evaluation: policy + risk + evidence."""
    enforcer = _get_enforcer()
    ctx = _build_context(args)
    decision = enforcer.evaluate(ctx)
    if args.json:
        print(json.dumps({
            "verdict": decision.verdict,
            "reason": decision.reason,
            "risk_score": decision.risk.value,
            "risk_level": decision.risk.level,
            "risk_factors": decision.risk.factors,
            "policy_result": decision.policy_result,
        }, indent=2))
    else:
        icon = {"approve": "✅", "deny": "❌", "escalate": "⚠️"}.get(decision.verdict, "❓")
        print(f"{icon} Verdict: {decision.verdict.upper()}")
        print(f"   Reason: {decision.reason}")
        print(f"   Risk: {decision.risk.value}/10 ({decision.risk.level})")
        for factor, detail in decision.risk.factors.items():
            print(f"     • {factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})")
 def cmd_risk(args: argparse.Namespace) -> None:
    """Risk scoring only."""
    scorer = _get_scorer()
    ctx = _build_context(args)
    result = scorer.score(ctx)
    if args.json:
        print(json.dumps({
            "risk_score": result.value,
            "risk_level": result.level,
            "acceptable": result.is_acceptable,
            "factors": result.factors,
        }, indent=2))
    else:
        icon = "🟢" if result.is_acceptable else "🔴"
        print(f"{icon} Risk Score: {result.value}/10 ({result.level})")
        print(f"   Acceptable: {'yes' if result.is_acceptable else 'NO'}")
        for factor, detail in result.factors.items():
            print(f"     • {factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})")
 def cmd_policies(args: argparse.Namespace) -> None:
    """List loaded policies."""
    engine = _get_engine()
    if not engine.policies:
        print("No policies loaded.")
        return
    for policy in engine.policies:
        print(f"📋 {policy.name} (v{policy.version})")
        print(f"   {policy.description}")
        print(f"   Rules: {len(policy.rules)}")
        for rule in policy.rules:
            print(f"     • {rule.name} → {rule.effect} (priority: {rule.priority})")
        print()
 def cmd_status(args: argparse.Namespace) -> None:
    """Show governance system status."""
    engine = _get_engine()
    scorer = _get_scorer()
    policies_count = len(engine.policies)
    rules_count = sum(len(p.rules) for p in engine.policies)
    policies_dir = DEFAULT_POLICIES_DIR
    controls_file = DEFAULT_CONTROLS_MAPPING
    print("🛡️  Darkplex Governance Status")
    print(f"   Policies dir:    {policies_dir}")
    print(f"   Controls map:    {controls_file}")
    print(f"   Policies loaded: {policies_count}")
    print(f"   Total rules:     {rules_count}")
    print(f"   Policies dir exists: {'✅' if Path(policies_dir).exists() else '❌'}")
    print(f"   Controls file exists: {'✅' if Path(controls_file).exists() else '❌'}")
 def cmd_report(args: argparse.Namespace) -> None:
    """Generate compliance report (placeholder — needs live evidence)."""
    from governance.evidence import EvidenceCollector, ControlMapping
    from governance.report_generator import ReportGenerator
    collector = EvidenceCollector(
        control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING)
    )
    generator = ReportGenerator(collector)
    if args.agent:
        report = generator.generate_agent_report(args.agent)
    else:
        report = generator.generate_compliance_report()
    output = json.dumps(report, indent=2)
    if args.output:
        Path(args.output).write_text(output)
        print(f"✅ Report written to {args.output}")
    else:
        print(output)
 def main() -> None:
    parser = argparse.ArgumentParser(prog="darkplex governance", description="Governance Engine")
    parser.add_argument("--json", action="store_true", help="JSON output")
    sub = parser.add_subparsers(dest="subcmd")
    # evaluate
    p_eval = sub.add_parser("evaluate", aliases=["eval"], help="Full policy + risk evaluation")
    p_eval.add_argument("--agent", required=True)
    p_eval.add_argument("--action", required=True)
    p_eval.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"])
    p_eval.add_argument("--target", default="internal", choices=["internal", "external"])
    p_eval.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"])
    p_eval.add_argument("--json", action="store_true", dest="json")
    # risk
    p_risk = sub.add_parser("risk", help="Risk scoring only")
    p_risk.add_argument("--agent", default="unknown")
    p_risk.add_argument("--action", default="unknown")
    p_risk.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"])
    p_risk.add_argument("--target", default="internal", choices=["internal", "external"])
    p_risk.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"])
    p_risk.add_argument("--json", action="store_true", dest="json")
    # policies
    p_pol = sub.add_parser("policies", help="List loaded policies")
    p_pol.add_argument("--reload", action="store_true")
    # status
    sub.add_parser("status", help="Show governance status")
    # report
    p_rep = sub.add_parser("report", help="Generate compliance report")
    p_rep.add_argument("--agent", default=None)
    p_rep.add_argument("--output", "-o", default=None)
    p_rep.add_argument("--json", action="store_true", dest="json")
    args = parser.parse_args()
    if args.subcmd in ("evaluate", "eval"):
        cmd_evaluate(args)
    elif args.subcmd == "risk":
        cmd_risk(args)
    elif args.subcmd == "policies":
        cmd_policies(args)
    elif args.subcmd == "status":
        cmd_status(args)
    elif args.subcmd == "report":
        cmd_report(args)
    else:
        parser.print_help()
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/cortex/governance/controls/iso27001-mapping.yaml
+++ b/cortex/governance/controls/iso27001-mapping.yaml
@ -0,0 +1,70 @@
 # ISO 27001 Annex A Controls → NATS Event Streams Mapping
 #
 # Maps governance event types to ISO 27001:2022 Annex A controls.
 # Used by the Evidence Collector to tag evidence with applicable controls.
 version: "1.0.0"
 mappings:
  # A.5 — Organizational Controls (Information Security Policies)
  - controls:
      - "A.5.1"   # Policies for information security
      - "A.5.2"   # Information security roles and responsibilities
      - "A.5.4"   # Management responsibilities
    event_types:
      - policy_evaluation
      - policy_update
      - policy_violation
    nats_subjects:
      - "governance.policy.>"
  # A.5.10-12 — Acceptable use, return, classification
  - controls:
      - "A.5.10"  # Acceptable use of information
      - "A.5.12"  # Classification of information
      - "A.5.13"  # Labelling of information
    event_types:
      - data_access
      - data_classification
      - data_export
    nats_subjects:
      - "governance.data.>"
  # A.8 — Technology Controls (Asset Management)
  - controls:
      - "A.8.1"   # User endpoint devices
      - "A.8.2"   # Privileged access rights
      - "A.8.5"   # Secure authentication
      - "A.8.9"   # Configuration management
      - "A.8.16"  # Monitoring activities
    event_types:
      - agent_authentication
      - agent_action
      - system_configuration
      - monitoring_alert
    nats_subjects:
      - "governance.agent.>"
      - "governance.system.>"
  # A.9 — Access Control
  - controls:
      - "A.5.15"  # Access control
      - "A.5.16"  # Identity management
      - "A.5.17"  # Authentication information
      - "A.5.18"  # Access rights
    event_types:
      - access_request
      - access_granted
      - access_denied
      - role_change
    nats_subjects:
      - "governance.access.>"
  # A.5.23-25 — Supplier/Cloud
  - controls:
      - "A.5.23"  # Information security for cloud services
    event_types:
      - external_api_call
      - cloud_service_access
    nats_subjects:
      - "governance.external.>"
--- a/cortex/governance/enforcer.py
+++ b/cortex/governance/enforcer.py
@ -0,0 +1,129 @@
 """Runtime Enforcer: pre-execution policy check (approve/deny/escalate).
 The enforcer is the single entry point for all agent action governance.
 It orchestrates the policy engine, risk scorer, and evidence collector.
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from typing import Any
 from governance.evidence import EvidenceCollector
 from governance.policy import PolicyEngine
 from governance.risk_scorer import RiskResult, RiskScorer
 logger = logging.getLogger(__name__)
@dataclass
 class Decision:
    """The final governance decision for an agent action."""
    verdict: str  # "approve", "deny", "escalate"
    reason: str
    risk: RiskResult
    policy_result: dict[str, Any]
    @property
    def approved(self) -> bool:
        return self.verdict == "approve"
 class Enforcer:
    """Pre-execution governance enforcer.
    Evaluates every agent action against policies and risk scoring,
    records evidence, and returns a decision.
    Usage:
        enforcer = Enforcer(policy_engine, risk_scorer, evidence_collector)
        decision = enforcer.evaluate({"agent": "claudia", "action": "send_email", ...})
        if decision.approved:
            execute_action()
    """
    # Risk levels that override policy to deny/escalate
    RISK_OVERRIDES: dict[str, str] = {
        "critical": "deny",
        "high": "escalate",
    }
    def __init__(
        self,
        policy_engine: PolicyEngine | None = None,
        risk_scorer: RiskScorer | None = None,
        evidence_collector: EvidenceCollector | None = None,
    ) -> None:
        self.policy_engine = policy_engine or PolicyEngine()
        self.risk_scorer = risk_scorer or RiskScorer()
        self.evidence_collector = evidence_collector or EvidenceCollector()
    def evaluate(self, context: dict[str, Any]) -> Decision:
        """Evaluate an agent action and return a governance decision.
        Args:
            context: Action context dict with keys like:
                - agent: agent identifier
                - action: action name
                - data_type / data_classification: data sensitivity
                - target: internal/external
                - agent_role: role of the requesting agent
                - hour: time of day (optional)
        Returns:
            Decision with verdict, reason, risk score, and policy result.
        """
        # Normalize data_type
        if "data_classification" in context and "data_type" not in context:
            context["data_type"] = context["data_classification"]
        # Step 1: Risk scoring
        risk = self.risk_scorer.score(context)
        # Step 2: Policy evaluation
        policy_result = self.policy_engine.evaluate(context)
        policy_verdict = policy_result["verdict"]
        # Step 3: Combine — risk can override policy to be MORE restrictive
        verdict = policy_verdict
        reason = policy_result["reason"]
        risk_override = self.RISK_OVERRIDES.get(risk.level)
        if risk_override:
            strictness = {"deny": 0, "escalate": 1, "allow": 2}
            if strictness.get(risk_override, 2) < strictness.get(verdict, 2):
                verdict = risk_override
                reason = f"Risk override ({risk.level}): {reason}"
        # Step 4: Record evidence
        agent = context.get("agent", "unknown")
        action = context.get("action", "unknown")
        self.evidence_collector.record(
            event_type="policy_evaluation",
            agent=agent,
            action=action,
            verdict=verdict,
            risk_score=risk.value,
            risk_level=risk.level,
            details={
                "context": context,
                "policy_result": policy_result,
                "risk_factors": risk.factors,
            },
        )
        decision = Decision(
            verdict=verdict,
            reason=reason,
            risk=risk,
            policy_result=policy_result,
        )
        logger.info(
            "Enforcer decision: %s → %s (risk: %d/%s)",
            f"{agent}/{action}", verdict, risk.value, risk.level,
        )
        return decision
--- a/cortex/governance/evidence.py
+++ b/cortex/governance/evidence.py
@ -0,0 +1,153 @@
 """Evidence Collector: NATS JetStream events → ISO 27001 control mapping.
 Collects governance events from NATS, maps them to ISO 27001 Annex A controls,
 and stores evidence for audit reporting.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 import yaml
 logger = logging.getLogger(__name__)
@dataclass
 class EvidenceRecord:
    """A single piece of compliance evidence."""
    timestamp: str
    event_type: str
    agent: str
    action: str
    verdict: str
    risk_score: int
    risk_level: str
    controls: list[str]  # ISO 27001 control IDs
    details: dict[str, Any] = field(default_factory=dict)
    def to_dict(self) -> dict[str, Any]:
        return {
            "timestamp": self.timestamp,
            "event_type": self.event_type,
            "agent": self.agent,
            "action": self.action,
            "verdict": self.verdict,
            "risk_score": self.risk_score,
            "risk_level": self.risk_level,
            "controls": self.controls,
            "details": self.details,
        }
 class ControlMapping:
    """Maps event types to ISO 27001 Annex A controls."""
    def __init__(self, mapping_path: str | None = None) -> None:
        self.mapping: dict[str, list[str]] = {}
        path = mapping_path or os.environ.get(
            "GOVERNANCE_CONTROLS_MAPPING", "controls/iso27001-mapping.yaml"
        )
        self._load_mapping(path)
    def _load_mapping(self, path: str) -> None:
        """Load the control mapping from YAML."""
        try:
            with open(path, "r") as f:
                data = yaml.safe_load(f)
            for mapping in data.get("mappings", []):
                for event_type in mapping.get("event_types", []):
                    self.mapping.setdefault(event_type, []).extend(mapping.get("controls", []))
            logger.info("Loaded %d event type mappings", len(self.mapping))
        except FileNotFoundError:
            logger.warning("Control mapping not found: %s", path)
        except Exception:
            logger.exception("Failed to load control mapping: %s", path)
    def get_controls(self, event_type: str) -> list[str]:
        """Return ISO 27001 controls applicable to an event type."""
        return self.mapping.get(event_type, [])
 class EvidenceCollector:
    """Collects and stores governance evidence from agent actions.
    In production, this subscribes to NATS JetStream. For testing,
    evidence can be recorded directly via record().
    Usage:
        collector = EvidenceCollector()
        collector.record(event_type="policy_evaluation", agent="claudia", ...)
    """
    def __init__(self, control_mapping: ControlMapping | None = None) -> None:
        self.control_mapping = control_mapping or ControlMapping()
        self.evidence: list[EvidenceRecord] = []
    def record(
        self,
        event_type: str,
        agent: str,
        action: str,
        verdict: str,
        risk_score: int = 0,
        risk_level: str = "low",
        details: dict[str, Any] | None = None,
    ) -> EvidenceRecord:
        """Record a governance evidence entry.
        Args:
            event_type: Type of governance event (e.g., policy_evaluation, access_request)
            agent: Agent identifier
            action: Action being performed
            verdict: Policy verdict (allow/deny/escalate)
            risk_score: Numeric risk score (0-10)
            risk_level: Risk level string
            details: Additional context
        """
        controls = self.control_mapping.get_controls(event_type)
        record = EvidenceRecord(
            timestamp=datetime.now(timezone.utc).isoformat(),
            event_type=event_type,
            agent=agent,
            action=action,
            verdict=verdict,
            risk_score=risk_score,
            risk_level=risk_level,
            controls=controls,
            details=details or {},
        )
        self.evidence.append(record)
        logger.info(
            "Evidence recorded: %s by %s → %s (risk: %d/%s, controls: %s)",
            action, agent, verdict, risk_score, risk_level, controls,
        )
        return record
    def get_evidence(
        self,
        agent: str | None = None,
        control: str | None = None,
        verdict: str | None = None,
    ) -> list[EvidenceRecord]:
        """Query evidence with optional filters."""
        results = self.evidence
        if agent:
            results = [e for e in results if e.agent == agent]
        if control:
            results = [e for e in results if control in e.controls]
        if verdict:
            results = [e for e in results if e.verdict == verdict]
        return results
    def export_json(self) -> str:
        """Export all evidence as JSON."""
        return json.dumps([e.to_dict() for e in self.evidence], indent=2)
--- a/cortex/governance/policies/data-access.yaml
+++ b/cortex/governance/policies/data-access.yaml
@ -0,0 +1,46 @@
 # Data Access Policy
 # Defines who can access which data classifications.
 name: data-access
 description: Controls agent access to data based on classification and role
 version: "1.0.0"
 rules:
  - name: deny-external-restricted
    description: External agents cannot access restricted data
    conditions:
      agent_role: external
      data_type: restricted
    effect: deny
    priority: 100
  - name: escalate-confidential-external
    description: Confidential data going external requires escalation
    conditions:
      data_type: confidential
      target: external
    effect: escalate
    priority: 90
  - name: deny-restricted-external
    description: Restricted data must never leave internal systems
    conditions:
      data_type: restricted
      target: external
    effect: deny
    priority: 100
  - name: allow-public-any
    description: Public data can be accessed by anyone
    conditions:
      data_type: public
    effect: allow
    priority: 10
  - name: allow-internal-internal
    description: Internal data accessible within internal systems
    conditions:
      data_type: internal
      target: internal
    effect: allow
    priority: 50
--- a/cortex/governance/policies/external-comms.yaml
+++ b/cortex/governance/policies/external-comms.yaml
@ -0,0 +1,40 @@
 # External Communications Policy
 # Controls when and how agents can communicate externally.
 name: external-comms
 description: Governs agent communication with external systems and parties
 version: "1.0.0"
 rules:
  - name: deny-assistant-external-email
    description: Assistants cannot send external emails without escalation
    conditions:
      agent_role: assistant
      action: send_email
      target: external
    effect: escalate
    priority: 80
  - name: allow-operator-external
    description: Operators may communicate externally
    conditions:
      agent_role: operator
      target: external
    effect: allow
    priority: 70
  - name: deny-external-api-restricted
    description: No external API calls with restricted data
    conditions:
      action: api_call
      target: external
      data_type: restricted
    effect: deny
    priority: 100
  - name: allow-internal-comms
    description: Internal communication is always allowed
    conditions:
      target: internal
    effect: allow
    priority: 10
--- a/cortex/governance/policies/financial-data.yaml
+++ b/cortex/governance/policies/financial-data.yaml
@ -0,0 +1,42 @@
 # Financial Data Policy (BaFin-relevant)
 # Strict controls for financial data handling.
 name: financial-data
 description: BaFin-compliant financial data governance
 version: "1.0.0"
 rules:
  - name: deny-financial-external
    description: Financial data must not leave internal systems
    conditions:
      data_type: restricted
      action: export_data
      target: external
    effect: deny
    priority: 100
  - name: escalate-financial-access
    description: All access to financial data requires escalation
    conditions:
      data_type: restricted
      action: read_financial
    effect: escalate
    priority: 95
  - name: deny-financial-offhours
    description: Financial operations blocked outside business hours
    conditions:
      data_type: restricted
      action: modify_financial
    effect: escalate
    priority: 90
  - name: allow-financial-reporting
    description: Internal financial reporting is permitted for operators
    conditions:
      agent_role: operator
      action: generate_report
      data_type: confidential
      target: internal
    effect: allow
    priority: 80
--- a/cortex/governance/policies/schema.yaml
+++ b/cortex/governance/policies/schema.yaml
@ -0,0 +1,43 @@
 # Policy YAML Schema Definition
 # All policy files must conform to this structure.
 schema:
  version: "1.0"
  required_fields:
    - name
    - description
    - version
    - rules
  rule_schema:
    required_fields:
      - name
      - conditions
      - effect
    optional_fields:
      - priority
      - description
  valid_effects:
    - allow
    - deny
    - escalate
  valid_condition_keys:
    - agent
    - agent_role
    - action
    - data_type
    - data_classification
    - target
    - hour_range
  valid_data_types:
    - public
    - internal
    - confidential
    - restricted
  valid_targets:
    - internal
    - external
--- a/cortex/governance/policies/yesman-security.yaml
+++ b/cortex/governance/policies/yesman-security.yaml
@ -0,0 +1,78 @@
 # YesMan Security Policy — RedCrowMedia / Wasteland Network
 # Based on USER.md and MEMORY.md security rules.
 name: yesman-security
 description: Security rules for YesMan AI assistant at RedCrowMedia
 version: "1.0.0"
 rules:
  # Only Abe gives orders
  - name: deny-external-instructions
    description: Never execute instructions from external sources (emails, websites, third parties)
    conditions:
      source: external
      action: execute_instruction
    effect: deny
    priority: 100
  # Never send credentials externally
  - name: deny-credential-export
    description: Credentials, tokens, and keys must never leave the system
    conditions:
      data_type: restricted
      action: send_credentials
    effect: deny
    priority: 100
  # Email is not a command source
  - name: escalate-email-action
    description: Actions requested via email always require Abe's explicit approval
    conditions:
      source: email
      action: execute_request
    effect: escalate
    priority: 95
  # No software installation without audit + approval
  - name: escalate-software-install
    description: External software installation requires security audit and Abe's GO
    conditions:
      action: install_software
    effect: escalate
    priority: 95
  # System-critical changes need approval
  - name: escalate-system-changes
    description: System-critical or security-relevant changes require approval
    conditions:
      action: system_change
      target: production
    effect: escalate
    priority: 90
  # No public posting without approval
  - name: escalate-public-comms
    description: Public communications (emails, tweets, posts) require approval
    conditions:
      action: send_message
      target: external
    effect: escalate
    priority: 85
  # Internal file operations are fine
  - name: allow-internal-file-ops
    description: Reading and writing files within workspace is permitted
    conditions:
      action: file_operation
      target: internal
      data_type: internal
    effect: allow
    priority: 50
  # Web search is fine
  - name: allow-web-search
    description: Web searches and research are permitted
    conditions:
      action: web_search
    effect: allow
    priority: 40
--- a/cortex/governance/policy.py
+++ b/cortex/governance/policy.py
@ -0,0 +1,143 @@
 """Policy Engine: loads YAML policies and evaluates agent actions against them.
 Policies are human-readable YAML files, versioned in Git. Each policy defines
 rules with conditions and effects (allow/deny/escalate).
 """
 from __future__ import annotations
 import logging
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 import yaml
 logger = logging.getLogger(__name__)
@dataclass
 class Rule:
    """A single policy rule with conditions and an effect."""
    name: str
    conditions: dict[str, Any]
    effect: str  # "allow", "deny", "escalate"
    priority: int = 0
    def matches(self, context: dict[str, Any]) -> bool:
        """Check if all conditions match the given context."""
        for key, expected in self.conditions.items():
            actual = context.get(key)
            if actual is None:
                return False
            if isinstance(expected, list):
                if actual not in expected:
                    return False
            elif actual != expected:
                return False
        return True
@dataclass
 class Policy:
    """A named policy containing ordered rules."""
    name: str
    description: str
    version: str
    rules: list[Rule] = field(default_factory=list)
 class PolicyEngine:
    """Loads and evaluates YAML-based governance policies.
    Usage:
        engine = PolicyEngine(policies_dir="policies/")
        result = engine.evaluate(action_context)
    """
    def __init__(self, policies_dir: str | None = None) -> None:
        self.policies_dir = Path(
            policies_dir or os.environ.get("GOVERNANCE_POLICIES_DIR", "policies/")
        )
        self.policies: list[Policy] = []
        self._load_policies()
    def _load_policies(self) -> None:
        """Load all YAML policy files from the policies directory."""
        if not self.policies_dir.exists():
            logger.warning("Policies directory not found: %s", self.policies_dir)
            return
        for path in sorted(self.policies_dir.glob("*.yaml")):
            if path.name == "schema.yaml":
                continue
            try:
                policy = self._parse_policy(path)
                self.policies.append(policy)
                logger.info("Loaded policy: %s (%d rules)", policy.name, len(policy.rules))
            except Exception:
                logger.exception("Failed to load policy: %s", path)
    def _parse_policy(self, path: Path) -> Policy:
        """Parse a YAML file into a Policy object."""
        with open(path, "r") as f:
            data = yaml.safe_load(f)
        rules = []
        for rule_data in data.get("rules", []):
            rules.append(
                Rule(
                    name=rule_data["name"],
                    conditions=rule_data.get("conditions", {}),
                    effect=rule_data.get("effect", "deny"),
                    priority=rule_data.get("priority", 0),
                )
            )
        return Policy(
            name=data.get("name", path.stem),
            description=data.get("description", ""),
            version=data.get("version", "1.0.0"),
            rules=rules,
        )
    def evaluate(self, context: dict[str, Any]) -> dict[str, Any]:
        """Evaluate an action context against all loaded policies.
        Returns the highest-priority matching rule's effect, or 'allow' if no rules match.
        """
        matches: list[tuple[Rule, Policy]] = []
        for policy in self.policies:
            for rule in policy.rules:
                if rule.matches(context):
                    matches.append((rule, policy))
        if not matches:
            return {
                "verdict": "allow",
                "reason": "No matching policy rules",
                "matched_rules": [],
            }
        # Sort by priority (highest first), then by strictness (deny > escalate > allow)
        effect_order = {"deny": 0, "escalate": 1, "allow": 2}
        matches.sort(key=lambda m: (-m[0].priority, effect_order.get(m[0].effect, 2)))
        top_rule, top_policy = matches[0]
        return {
            "verdict": top_rule.effect,
            "reason": f"Policy '{top_policy.name}', rule '{top_rule.name}'",
            "matched_rules": [
                {"policy": p.name, "rule": r.name, "effect": r.effect}
                for r, p in matches
            ],
        }
    def reload(self) -> None:
        """Reload all policies from disk."""
        self.policies.clear()
        self._load_policies()
--- a/cortex/governance/report_generator.py
+++ b/cortex/governance/report_generator.py
@ -0,0 +1,109 @@
 """Audit Report Generator: creates compliance reports from collected evidence.
 Generates structured reports grouped by ISO 27001 controls, time periods,
 and agent activity.
 """
 from __future__ import annotations
 import json
 import logging
 from collections import defaultdict
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from typing import Any
 from governance.evidence import EvidenceCollector, EvidenceRecord
 logger = logging.getLogger(__name__)
@dataclass
 class ReportSection:
    """A section of an audit report."""
    title: str
    entries: list[dict[str, Any]]
    summary: dict[str, Any]
 class ReportGenerator:
    """Generates audit reports from governance evidence.
    Usage:
        generator = ReportGenerator(evidence_collector)
        report = generator.generate_compliance_report()
    """
    def __init__(self, evidence_collector: EvidenceCollector) -> None:
        self.collector = evidence_collector
    def generate_compliance_report(self) -> dict[str, Any]:
        """Generate a full compliance report grouped by ISO 27001 controls."""
        evidence = self.collector.evidence
        if not evidence:
            return {"generated_at": _now_iso(), "status": "no_evidence", "sections": []}
        by_control: dict[str, list[EvidenceRecord]] = defaultdict(list)
        for record in evidence:
            for control in record.controls:
                by_control[control].append(record)
        sections = []
        for control_id in sorted(by_control.keys()):
            records = by_control[control_id]
            sections.append({
                "control": control_id,
                "total_events": len(records),
                "verdicts": _count_verdicts(records),
                "risk_distribution": _count_risk_levels(records),
                "agents": list({r.agent for r in records}),
            })
        return {
            "generated_at": _now_iso(),
            "total_evidence": len(evidence),
            "controls_covered": list(sorted(by_control.keys())),
            "summary": {
                "total_deny": sum(1 for e in evidence if e.verdict == "deny"),
                "total_escalate": sum(1 for e in evidence if e.verdict == "escalate"),
                "total_allow": sum(1 for e in evidence if e.verdict == "allow"),
                "high_risk_events": sum(1 for e in evidence if e.risk_score >= 7),
            },
            "sections": sections,
        }
    def generate_agent_report(self, agent: str) -> dict[str, Any]:
        """Generate a report for a specific agent."""
        evidence = self.collector.get_evidence(agent=agent)
        return {
            "generated_at": _now_iso(),
            "agent": agent,
            "total_actions": len(evidence),
            "verdicts": _count_verdicts(evidence),
            "risk_distribution": _count_risk_levels(evidence),
            "actions": [e.to_dict() for e in evidence],
        }
    def export_json(self) -> str:
        """Export the compliance report as formatted JSON."""
        report = self.generate_compliance_report()
        return json.dumps(report, indent=2)
 def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()
 def _count_verdicts(records: list[EvidenceRecord]) -> dict[str, int]:
    counts: dict[str, int] = defaultdict(int)
    for r in records:
        counts[r.verdict] += 1
    return dict(counts)
 def _count_risk_levels(records: list[EvidenceRecord]) -> dict[str, int]:
    counts: dict[str, int] = defaultdict(int)
    for r in records:
        counts[r.risk_level] += 1
    return dict(counts)
--- a/cortex/governance/risk_scorer.py
+++ b/cortex/governance/risk_scorer.py
@ -0,0 +1,126 @@
 """Risk Scorer: context-based risk scoring for agent actions.
 Risk levels:
    - low (0-3): routine operations
    - elevated (4-6): notable but acceptable
    - high (7-8): requires escalation
    - critical (9-10): auto-deny + alert
 Factors: data classification, target (internal/external), agent role, time of day.
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from typing import Any
 logger = logging.getLogger(__name__)
 # Data classification weights
 DATA_WEIGHTS: dict[str, int] = {
    "public": 0,
    "internal": 2,
    "confidential": 5,
    "restricted": 8,
 }
 # Target weights
 TARGET_WEIGHTS: dict[str, int] = {
    "internal": 0,
    "external": 3,
 }
 # Agent role weights (lower = more trusted)
 ROLE_WEIGHTS: dict[str, int] = {
    "admin": -1,
    "operator": 0,
    "assistant": 1,
    "external": 3,
 }
 # Off-hours bonus (outside 8-18)
 OFF_HOURS_BONUS = 2
@dataclass
 class RiskResult:
    """Result of a risk assessment."""
    value: int
    level: str
    factors: dict[str, Any]
    @property
    def is_acceptable(self) -> bool:
        return self.value <= 6
 def _classify_level(score: int) -> str:
    """Map a numeric score to a risk level."""
    if score <= 3:
        return "low"
    elif score <= 6:
        return "elevated"
    elif score <= 8:
        return "high"
    else:
        return "critical"
 class RiskScorer:
    """Calculates contextual risk scores for agent actions.
    Usage:
        scorer = RiskScorer()
        result = scorer.score({"data_type": "confidential", "target": "external"})
    """
    def score(self, context: dict[str, Any]) -> RiskResult:
        """Score an action context and return a RiskResult.
        Args:
            context: Dict with optional keys:
                - data_type: public|internal|confidential|restricted
                - target: internal|external
                - agent_role: admin|operator|assistant|external
                - hour: 0-23 (defaults to current hour UTC)
        """
        factors: dict[str, Any] = {}
        total = 0
        # Data classification
        data_type = context.get("data_type", "public")
        data_score = DATA_WEIGHTS.get(data_type, 0)
        factors["data_type"] = {"value": data_type, "score": data_score}
        total += data_score
        # Target
        target = context.get("target", "internal")
        target_score = TARGET_WEIGHTS.get(target, 0)
        factors["target"] = {"value": target, "score": target_score}
        total += target_score
        # Agent role
        role = context.get("agent_role", "assistant")
        role_score = ROLE_WEIGHTS.get(role, 1)
        factors["agent_role"] = {"value": role, "score": role_score}
        total += role_score
        # Time of day
        hour = context.get("hour")
        if hour is None:
            hour = datetime.now(timezone.utc).hour
        is_off_hours = hour < 8 or hour >= 18
        time_score = OFF_HOURS_BONUS if is_off_hours else 0
        factors["time_of_day"] = {"hour": hour, "off_hours": is_off_hours, "score": time_score}
        total += time_score
        # Clamp to 0-10
        total = max(0, min(10, total))
        level = _classify_level(total)
        logger.debug("Risk score: %d (%s) — factors: %s", total, level, factors)
        return RiskResult(value=total, level=level, factors=factors)
--- a/cortex/intelligence/init.py
+++ b/cortex/intelligence/init.py
--- a/cortex/intelligence/anticipator.py
+++ b/cortex/intelligence/anticipator.py
@ -0,0 +1,193 @@
 """Proactive Intelligence: pattern-based predictions and anticipation.
 Detects patterns in historical events and generates proactive alerts:
 - SSL certificate expiry approaching
 - Recurring issues (same error pattern at predictable intervals)
 - Usage pattern anomalies
 - Resource exhaustion trends
 """
 from __future__ import annotations
 import logging
 import os
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
 from enum import Enum
 from typing import Any, Callable
 logger = logging.getLogger(__name__)
 class AlertSeverity(Enum):
    INFO = "info"
    WARNING = "warning"
    CRITICAL = "critical"
@dataclass
 class Prediction:
    """A proactive prediction about a future event."""
    pattern_name: str
    description: str
    severity: AlertSeverity
    predicted_time: datetime | None = None
    confidence: float = 0.0  # 0.0-1.0
    recommended_action: str = ""
    metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
 class PatternDefinition:
    """Definition of a detectable pattern."""
    name: str
    description: str
    detector: Callable[[list[dict[str, Any]]], Prediction | None]
 class Anticipator:
    """Proactive intelligence engine that detects patterns and generates predictions.
    Usage:
        anticipator = Anticipator()
        anticipator.register_pattern(ssl_expiry_pattern)
        predictions = anticipator.analyze(events)
    """
    def __init__(self) -> None:
        self.patterns: list[PatternDefinition] = []
        self._register_builtin_patterns()
    def register_pattern(self, pattern: PatternDefinition) -> None:
        """Register a new pattern detector."""
        self.patterns.append(pattern)
        logger.info("Registered pattern: %s", pattern.name)
    def analyze(self, events: list[dict[str, Any]]) -> list[Prediction]:
        """Analyze events against all registered patterns.
        Args:
            events: List of event dicts with at minimum 'timestamp', 'type', 'data'.
        Returns:
            List of predictions, sorted by severity (critical first).
        """
        predictions: list[Prediction] = []
        for pattern in self.patterns:
            try:
                prediction = pattern.detector(events)
                if prediction:
                    predictions.append(prediction)
                    logger.info(
                        "Pattern detected: %s (severity: %s, confidence: %.2f)",
                        prediction.pattern_name,
                        prediction.severity.value,
                        prediction.confidence,
                    )
            except Exception:
                logger.exception("Pattern detector failed: %s", pattern.name)
        # Sort: critical first, then by confidence
        severity_order = {AlertSeverity.CRITICAL: 0, AlertSeverity.WARNING: 1, AlertSeverity.INFO: 2}
        predictions.sort(key=lambda p: (severity_order.get(p.severity, 3), -p.confidence))
        return predictions
    def _register_builtin_patterns(self) -> None:
        """Register built-in pattern detectors."""
        self.register_pattern(PatternDefinition(
            name="ssl_cert_expiry",
            description="Detects SSL certificates approaching expiry",
            detector=_detect_ssl_expiry,
        ))
        self.register_pattern(PatternDefinition(
            name="recurring_error",
            description="Detects recurring error patterns",
            detector=_detect_recurring_errors,
        ))
        self.register_pattern(PatternDefinition(
            name="usage_spike",
            description="Detects unusual usage spikes",
            detector=_detect_usage_spike,
        ))
 def _detect_ssl_expiry(events: list[dict[str, Any]]) -> Prediction | None:
    """Detect SSL certificates that will expire within 14 days."""
    now = datetime.now(timezone.utc)
    threshold = timedelta(days=14)
    for event in events:
        if event.get("type") != "ssl_cert_check":
            continue
        expiry_str = event.get("data", {}).get("expiry")
        if not expiry_str:
            continue
        try:
            expiry = datetime.fromisoformat(expiry_str)
            if expiry.tzinfo is None:
                expiry = expiry.replace(tzinfo=timezone.utc)
        except (ValueError, TypeError):
            continue
        remaining = expiry - now
        if remaining < threshold:
            domain = event.get("data", {}).get("domain", "unknown")
            severity = AlertSeverity.CRITICAL if remaining.days < 3 else AlertSeverity.WARNING
            return Prediction(
                pattern_name="ssl_cert_expiry",
                description=f"SSL certificate for {domain} expires in {remaining.days} days",
                severity=severity,
                predicted_time=expiry,
                confidence=0.95,
                recommended_action=f"Renew SSL certificate for {domain}",
                metadata={"domain": domain, "days_remaining": remaining.days},
            )
    return None
 def _detect_recurring_errors(events: list[dict[str, Any]]) -> Prediction | None:
    """Detect recurring error patterns (same error type appearing 3+ times)."""
    error_counts: dict[str, int] = {}
    for event in events:
        if event.get("type") == "error":
            error_key = event.get("data", {}).get("error_type", "unknown")
            error_counts[error_key] = error_counts.get(error_key, 0) + 1
    for error_type, count in error_counts.items():
        if count >= 3:
            return Prediction(
                pattern_name="recurring_error",
                description=f"Recurring error '{error_type}' detected ({count} occurrences)",
                severity=AlertSeverity.WARNING,
                confidence=min(0.5 + count * 0.1, 0.95),
                recommended_action=f"Investigate root cause of '{error_type}'",
                metadata={"error_type": error_type, "count": count},
            )
    return None
 def _detect_usage_spike(events: list[dict[str, Any]]) -> Prediction | None:
    """Detect unusual usage spikes (>2x average in recent window)."""
    usage_events = [e for e in events if e.get("type") == "usage_metric"]
    if len(usage_events) < 10:
        return None
    values = [e.get("data", {}).get("value", 0) for e in usage_events]
    avg = sum(values) / len(values)
    recent = values[-3:] if len(values) >= 3 else values
    recent_avg = sum(recent) / len(recent) if recent else 0
    if avg > 0 and recent_avg > avg * 2:
        return Prediction(
            pattern_name="usage_spike",
            description=f"Usage spike detected: recent avg {recent_avg:.1f} vs overall {avg:.1f}",
            severity=AlertSeverity.WARNING,
            confidence=0.7,
            recommended_action="Investigate usage spike — potential anomaly or load increase",
            metadata={"average": avg, "recent_average": recent_avg, "ratio": recent_avg / avg},
        )
    return None
--- a/cortex/intelligence/collective.py
+++ b/cortex/intelligence/collective.py
@ -0,0 +1,154 @@
 """Collective Learning: aggregates patterns across all internal agents.
 Subscribes to the shared memory bus, collects insights from all
 Vainplex-internal agents, and builds an aggregated knowledge base
 for pattern detection and cross-agent learning.
 🚨 STRICT DATA ISOLATION: Only Vainplex-internal agents participate.
 No customer data. No customer agent insights. Ever.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory
 logger = logging.getLogger(__name__)
@dataclass
 class AggregatedPattern:
    """A pattern detected across multiple agents."""
    topic: str
    description: str
    contributing_agents: list[str]
    confidence: float
    occurrence_count: int
    first_seen: str
    last_seen: str
    insights: list[Insight] = field(default_factory=list)
 class CollectiveLearning:
    """Aggregates patterns from all internal agents into collective knowledge.
    Usage:
        collective = CollectiveLearning(shared_memory)
        await collective.start()
        patterns = collective.get_patterns()
    ⚠️ DATA ISOLATION: Only processes insights from ALLOWED_AGENTS.
    """
    def __init__(self, shared_memory: SharedMemory) -> None:
        self.shared_memory = shared_memory
        self._insights_by_topic: dict[str, list[Insight]] = defaultdict(list)
        self._patterns: list[AggregatedPattern] = []
    async def start(self) -> None:
        """Start listening for insights on all topics."""
        await self.shared_memory.subscribe(">", self._handle_insight)
        logger.info("Collective learning started — listening for insights")
    async def _handle_insight(self, insight: Insight) -> None:
        """Process an incoming insight."""
        # Double-check data isolation
        if insight.agent not in ALLOWED_AGENTS:
            logger.warning("Rejected insight from non-internal agent: %s", insight.agent)
            return
        self._insights_by_topic[insight.topic].append(insight)
        logger.debug(
            "Collected insight: %s from %s (topic: %s)",
            insight.content[:60], insight.agent, insight.topic,
        )
        # Re-analyze patterns when new data arrives
        self._detect_patterns()
    def _detect_patterns(self) -> None:
        """Analyze collected insights to find cross-agent patterns."""
        new_patterns: list[AggregatedPattern] = []
        for topic, insights in self._insights_by_topic.items():
            if len(insights) < 2:
                continue
            agents = list({i.agent for i in insights})
            if len(agents) < 2:
                # Single-agent observations aren't "collective" patterns
                continue
            timestamps = sorted(i.timestamp for i in insights)
            avg_confidence = sum(i.confidence for i in insights) / len(insights)
            pattern = AggregatedPattern(
                topic=topic,
                description=f"Cross-agent pattern on '{topic}' observed by {', '.join(agents)}",
                contributing_agents=agents,
                confidence=avg_confidence,
                occurrence_count=len(insights),
                first_seen=timestamps[0],
                last_seen=timestamps[-1],
                insights=insights,
            )
            new_patterns.append(pattern)
        self._patterns = new_patterns
    def get_patterns(
        self,
        topic: str | None = None,
        min_confidence: float = 0.0,
    ) -> list[AggregatedPattern]:
        """Retrieve detected collective patterns.
        Args:
            topic: Filter by topic (optional).
            min_confidence: Minimum confidence threshold.
        """
        patterns = self._patterns
        if topic:
            patterns = [p for p in patterns if p.topic == topic]
        if min_confidence > 0:
            patterns = [p for p in patterns if p.confidence >= min_confidence]
        return patterns
    def get_topic_summary(self) -> dict[str, Any]:
        """Get a summary of all topics and their insight counts."""
        return {
            topic: {
                "count": len(insights),
                "agents": list({i.agent for i in insights}),
                "latest": max(i.timestamp for i in insights) if insights else None,
            }
            for topic, insights in self._insights_by_topic.items()
        }
    def export_knowledge(self) -> str:
        """Export collective knowledge as JSON."""
        return json.dumps({
            "exported_at": datetime.now(timezone.utc).isoformat(),
            "allowed_agents": sorted(ALLOWED_AGENTS),
            "patterns": [
                {
                    "topic": p.topic,
                    "description": p.description,
                    "contributing_agents": p.contributing_agents,
                    "confidence": p.confidence,
                    "occurrence_count": p.occurrence_count,
                    "first_seen": p.first_seen,
                    "last_seen": p.last_seen,
                }
                for p in self._patterns
            ],
            "topics": self.get_topic_summary(),
        }, indent=2)
--- a/cortex/intelligence/knowledge_cleanup.py
+++ b/cortex/intelligence/knowledge_cleanup.py
@ -0,0 +1,420 @@
 #!/usr/bin/env python3
 """Knowledge graph cleanup: classify unknowns, deduplicate entities, score relationships.
 Usage:
    darkplex cleanup [--classify] [--dedupe] [--score] [--dry-run]
 If no flags given, runs all three steps.
 """
 import argparse
 import copy
 import json
 import logging
 import os
 import shutil
 import sys
 import time
 from collections import defaultdict
 from datetime import datetime, timedelta
 from pathlib import Path
 import requests
 log = logging.getLogger("knowledge_cleanup")
 KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
 ENTITIES_PATH = KNOWLEDGE_DIR / "entities.json"
 RELATIONSHIPS_PATH = KNOWLEDGE_DIR / "relationships.json"
 OLLAMA_URL = "http://localhost:11434"
 OLLAMA_MODEL = "qwen2.5:7b"
 VALID_TYPES = {"person", "organization", "company", "project", "technology",
               "location", "event", "concept", "product"}
 def backup(path: Path) -> Path:
    """Create timestamped backup."""
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_path = path.with_suffix(f".backup_{ts}.json")
    shutil.copy2(path, backup_path)
    log.info(f"Backed up {path.name} → {backup_path.name}")
    return backup_path
 def atomic_write(path: Path, data):
    """Write JSON atomically via temp file."""
    tmp = path.with_suffix(".tmp")
    with open(tmp, "w") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    tmp.replace(path)
    log.info(f"Wrote {path.name}")
 def load_entities() -> dict:
    with open(ENTITIES_PATH) as f:
        return json.load(f)
 def load_relationships() -> dict:
    with open(RELATIONSHIPS_PATH) as f:
        return json.load(f)
 def ollama_generate(prompt: str, timeout: int = 120) -> str:
    """Call Ollama generate API."""
    resp = requests.post(f"{OLLAMA_URL}/api/generate", json={
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False,
        "options": {"temperature": 0.1, "num_predict": 2000}
    }, timeout=timeout)
    resp.raise_for_status()
    return resp.json().get("response", "")
 # ─── Task 1: Classify Unknowns ───────────────────────────────────────────────
 def classify_unknowns(entities: dict, dry_run: bool = False) -> dict:
    """Classify entities with type='unknown' using LLM."""
    unknowns = {k: v for k, v in entities.items()
                if isinstance(v, dict) and v.get("type") == "unknown"}
    if not unknowns:
        log.info("No unknown entities to classify.")
        return entities
    log.info(f"Classifying {len(unknowns)} unknown entities...")
    names = list(unknowns.keys())
    batch_size = 50
    results = {}
    for i in range(0, len(names), batch_size):
        batch = names[i:i + batch_size]
        batch_num = i // batch_size + 1
        total_batches = (len(names) + batch_size - 1) // batch_size
        log.info(f"Batch {batch_num}/{total_batches} ({len(batch)} entities)")
        numbered = "\n".join(f"{j+1}. {name}" for j, name in enumerate(batch))
        prompt = f"""Classify each entity name into exactly one category.
 Categories: person, organization, company, project, technology, location, event, concept, product
 If a name looks like a person's first name only (e.g. "sarah", "thomas"), classify as person.
 If it's a common word that isn't clearly an entity (e.g. "ahnung", "wir", "evtl", "schau"), classify as concept.
 If unsure, classify as concept.
 Respond with ONLY a JSON object mapping the number to the category. Example:
 {{"1": "person", "2": "company", "3": "concept"}}
 Entities:
 {numbered}
 JSON:"""
        try:
            response = ollama_generate(prompt)
            # Extract JSON from response
            start = response.find("{")
            end = response.rfind("}") + 1
            if start >= 0 and end > start:
                parsed = json.loads(response[start:end])
                for idx_str, category in parsed.items():
                    idx = int(idx_str) - 1
                    if 0 <= idx < len(batch):
                        cat = category.strip().lower()
                        if cat in VALID_TYPES:
                            results[batch[idx]] = cat
        except Exception as e:
            log.warning(f"Batch {batch_num} failed: {e}")
            continue
        time.sleep(0.5)  # Be nice to Ollama
    # Apply results
    stats = defaultdict(int)
    for name, new_type in results.items():
        old_type = entities[name].get("type", "unknown")
        if old_type != new_type:
            stats[f"{old_type} → {new_type}"] += 1
            if not dry_run:
                entities[name]["type"] = new_type
                entities[name]["classified_by"] = "llm_cleanup"
                entities[name]["classified_at"] = datetime.now().isoformat()
    log.info(f"Classified {len(results)}/{len(unknowns)} unknowns:")
    for transition, count in sorted(stats.items(), key=lambda x: -x[1]):
        log.info(f"  {transition}: {count}")
    remaining = sum(1 for k, v in entities.items()
                    if isinstance(v, dict) and v.get("type") == "unknown")
    log.info(f"Remaining unknowns: {remaining}")
    return entities
 # ─── Task 2: Deduplicate ─────────────────────────────────────────────────────
 def find_duplicates(entities: dict) -> list:
    """Find duplicate entity groups via case-insensitive matching."""
    # Group by normalized name
    groups = defaultdict(list)
    for name in entities:
        normalized = name.strip().lower()
        groups[normalized].append(name)
    # Also check for substring containment (e.g. "mondo gate" vs "mondo gate ag")
    names_lower = {name: name.strip().lower() for name in entities}
    sorted_names = sorted(names_lower.items(), key=lambda x: len(x[1]))
    # Find names where one is a prefix/substring of another
    substring_pairs = []
    for i, (name_a, low_a) in enumerate(sorted_names):
        if len(low_a) < 3:
            continue
        for name_b, low_b in sorted_names[i+1:]:
            if low_a == low_b:
                continue
            if low_b.startswith(low_a + " ") or low_b.startswith(low_a + "-"):
                substring_pairs.append((name_a, name_b))
    # Build merge groups
    merge_groups = []
    # Exact case duplicates
    for normalized, names in groups.items():
        if len(names) > 1:
            merge_groups.append(names)
    # Substring matches (merge into existing groups or create new)
    for short, long in substring_pairs:
        found = False
        for group in merge_groups:
            if short in group or long in group:
                if short not in group:
                    group.append(short)
                if long not in group:
                    group.append(long)
                found = True
                break
        if not found:
            merge_groups.append([short, long])
    return merge_groups
 def pick_canonical(names: list, entities: dict) -> str:
    """Pick the most detailed entity name as canonical."""
    # Prefer: longest name, most fields, not all-lowercase
    def score(name):
        e = entities.get(name, {})
        fields = len(e) if isinstance(e, dict) else 0
        length = len(name)
        has_upper = int(any(c.isupper() for c in name))
        return (has_upper, fields, length)
    return max(names, key=score)
 def deduplicate(entities: dict, relationships: dict, dry_run: bool = False) -> tuple:
    """Deduplicate entities and update relationships."""
    groups = find_duplicates(entities)
    if not groups:
        log.info("No duplicates found.")
        return entities, relationships
    log.info(f"Found {len(groups)} duplicate groups:")
    alias_map = {}  # old_name → canonical_name
    for group in groups:
        canonical = pick_canonical(group, entities)
        aliases = [n for n in group if n != canonical]
        if not aliases:
            continue
        log.info(f"  Canonical: '{canonical}' ← aliases: {aliases}")
        for alias in aliases:
            alias_map[alias] = canonical
        if not dry_run:
            # Merge fields into canonical
            canonical_entry = entities.get(canonical, {})
            if not isinstance(canonical_entry, dict):
                canonical_entry = {}
            existing_aliases = canonical_entry.get("aliases", [])
            for alias in aliases:
                if alias not in existing_aliases:
                    existing_aliases.append(alias)
                alias_entry = entities.get(alias, {})
                if isinstance(alias_entry, dict):
                    # Merge non-existing fields
                    for k, v in alias_entry.items():
                        if k not in canonical_entry and k not in ("type", "aliases"):
                            canonical_entry[k] = v
            canonical_entry["aliases"] = existing_aliases
            entities[canonical] = canonical_entry
            # Remove aliases from entities
            for alias in aliases:
                if alias in entities:
                    del entities[alias]
    # Update relationships
    if not dry_run and alias_map:
        updated_rels = {}
        remapped = 0
        for key, rel in relationships.items():
            a = rel.get("a", "")
            b = rel.get("b", "")
            new_a = alias_map.get(a, a)
            new_b = alias_map.get(b, b)
            if new_a != a or new_b != b:
                remapped += 1
                rel["a"] = new_a
                rel["b"] = new_b
            new_key = f"{new_a}::{new_b}"
            if new_key in updated_rels:
                # Merge: sum counts, keep latest last_seen
                existing = updated_rels[new_key]
                existing["count"] = existing.get("count", 0) + rel.get("count", 0)
                if rel.get("last_seen", "") > existing.get("last_seen", ""):
                    existing["last_seen"] = rel["last_seen"]
                if rel.get("first_seen", "") < existing.get("first_seen", ""):
                    existing["first_seen"] = rel["first_seen"]
                # Merge types
                existing_types = set(existing.get("types", []))
                existing_types.update(rel.get("types", []))
                existing["types"] = list(existing_types)
            else:
                updated_rels[new_key] = rel
        log.info(f"Remapped {remapped} relationships, merged {len(relationships) - len(updated_rels)} duplicates")
        relationships = updated_rels
    log.info(f"Merged {len(alias_map)} aliases into {len(set(alias_map.values()))} canonical entities")
    return entities, relationships
 # ─── Task 3: Relationship Scoring ────────────────────────────────────────────
 def score_relationships(relationships: dict, dry_run: bool = False) -> dict:
    """Add strength scores and decay old relationships."""
    now = datetime.now()
    decay_threshold = now - timedelta(days=30)
    removed = 0
    scored = 0
    decayed = 0
    to_remove = []
    for key, rel in relationships.items():
        count = rel.get("count", 1)
        last_seen_str = rel.get("last_seen", "")
        first_seen_str = rel.get("first_seen", "")
        types = rel.get("types", [])
        # Base strength from count (log scale, capped at 1)
        import math
        count_score = min(1.0, math.log(count + 1) / math.log(100))
        # Context diversity: more relationship types = stronger
        diversity_score = min(1.0, len(types) * 0.3)
        # Recency score
        recency_score = 1.0
        if last_seen_str:
            try:
                last_seen = datetime.fromisoformat(last_seen_str)
                days_ago = (now - last_seen).days
                if days_ago > 30:
                    recency_score = max(0.0, 1.0 - (days_ago - 30) / 180)
                    decayed += 1
            except (ValueError, TypeError):
                pass
        # Combined strength
        strength = round(
            count_score * 0.4 + diversity_score * 0.3 + recency_score * 0.3,
            3
        )
        if strength < 0.1:
            to_remove.append(key)
            removed += 1
        else:
            if not dry_run:
                rel["strength"] = strength
            scored += 1
    if not dry_run:
        for key in to_remove:
            del relationships[key]
    log.info(f"Scored {scored} relationships, decayed {decayed}, removed {removed} (strength < 0.1)")
    return relationships
 # ─── Main ────────────────────────────────────────────────────────────────────
 def main():
    parser = argparse.ArgumentParser(description="Knowledge graph cleanup")
    parser.add_argument("--classify", action="store_true", help="Classify unknown entities")
    parser.add_argument("--dedupe", action="store_true", help="Deduplicate entities")
    parser.add_argument("--score", action="store_true", help="Score relationships")
    parser.add_argument("--dry-run", action="store_true", help="Show changes without writing")
    args = parser.parse_args()
    # If no specific flags, run all
    run_all = not (args.classify or args.dedupe or args.score)
    entities = load_entities()
    relationships = load_relationships()
    log.info(f"Loaded {len(entities)} entities, {len(relationships)} relationships")
    # Backup before any modifications
    if not args.dry_run:
        backup(ENTITIES_PATH)
        backup(RELATIONSHIPS_PATH)
    if args.dry_run:
        log.info("═══ DRY RUN — no files will be modified ═══")
    if run_all or args.classify:
        log.info("─── Step 1: Classify Unknowns ───")
        entities = classify_unknowns(entities, dry_run=args.dry_run)
    if run_all or args.dedupe:
        log.info("─── Step 2: Deduplicate Entities ───")
        entities, relationships = deduplicate(entities, relationships, dry_run=args.dry_run)
    if run_all or args.score:
        log.info("─── Step 3: Score Relationships ───")
        relationships = score_relationships(relationships, dry_run=args.dry_run)
    if not args.dry_run:
        atomic_write(ENTITIES_PATH, entities)
        atomic_write(RELATIONSHIPS_PATH, relationships)
        log.info(f"Done. Final: {len(entities)} entities, {len(relationships)} relationships")
    else:
        log.info(f"Dry run complete. Would result in: {len(entities)} entities, {len(relationships)} relationships")
 if __name__ == "__main__":
    logging.basicConfig(
        format='%(asctime)s %(name)s %(levelname)s %(message)s',
        level=logging.INFO,
    )
    main()
--- a/cortex/intelligence/llm_extractor.py
+++ b/cortex/intelligence/llm_extractor.py
@ -0,0 +1,214 @@
 #!/usr/bin/env python3
 """
 LLM-Powered Entity Extractor — Uses Ollama for Named Entity Recognition.
 Standalone module. No pip dependencies beyond stdlib.
 Calls Ollama HTTP API with structured NER prompts.
 Configuration via environment variables:
    DARKPLEX_OLLAMA_URL      — Ollama base URL (default: http://localhost:11434)
    DARKPLEX_OLLAMA_MODEL    — Model name (default: mistral:7b)
    DARKPLEX_OLLAMA_TIMEOUT  — Timeout in seconds (default: 10)
    DARKPLEX_EXTRACTOR       — llm|regex|auto (default: auto)
 """
 import json
 import logging
 import os
 import urllib.request
 import urllib.error
 log = logging.getLogger("llm-extractor")
 OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434")
 OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "llama3.2:1b")
 OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30"))
 VALID_TYPES = {"person", "organization", "company", "project", "technology",
               "location", "event", "concept", "product"}
 NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object.
 Each key is the entity name (lowercase), each value has "type" and "context".
 Valid types: person, organization, company, project, technology, location, event, concept, product
 Rules:
 - Skip common/generic words (the, system, message, etc.)
 - Entity names should be lowercase, use hyphens for multi-word
 - "context" is a 2-5 word description of the entity's role in the text
 - If no entities found, return empty JSON object
 - Return ONLY valid JSON, no explanation
 Text:
 {text}
 JSON:"""
 BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object.
 Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context".
 Valid types: person, organization, company, project, technology, location, event, concept, product
 Rules:
 - Skip common/generic words
 - "context" is a 2-5 word description
 - If no entities found, return empty JSON object
 - Return ONLY valid JSON, no markdown, no explanation
 Texts:
 {texts}
 JSON:"""
 def _call_ollama(prompt: str) -> str | None:
    """Call Ollama generate API. Returns response text or None on failure."""
    payload = json.dumps({
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False,
        "options": {"temperature": 0.1, "num_predict": 1024},
    }).encode()
    req = urllib.request.Request(
        f"{OLLAMA_URL}/api/generate",
        data=payload,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp:
            data = json.loads(resp.read().decode())
            return data.get("response", "")
    except (urllib.error.URLError, TimeoutError, OSError) as e:
        log.warning(f"Ollama call failed: {e}")
        return None
    except Exception as e:
        log.warning(f"Ollama unexpected error: {e}")
        return None
 def _parse_json_response(text: str) -> dict:
    """Extract JSON dict from LLM response, handling markdown fences etc."""
    if not text:
        return {}
    # Strip markdown code fences
    text = text.strip()
    if text.startswith("```"):
        lines = text.split("\n")
        lines = [l for l in lines if not l.strip().startswith("```")]
        text = "\n".join(lines)
    # Find the JSON object
    start = text.find("{")
    if start == -1:
        return {}
    # Find matching closing brace
    depth = 0
    for i in range(start, len(text)):
        if text[i] == "{":
            depth += 1
        elif text[i] == "}":
            depth -= 1
            if depth == 0:
                try:
                    return json.loads(text[start:i + 1])
                except json.JSONDecodeError:
                    return {}
    return {}
 def _normalize_entities(raw: dict) -> dict:
    """Normalize and validate extracted entities."""
    result = {}
    for name, info in raw.items():
        if not isinstance(info, dict):
            continue
        name = name.strip().lower().replace("_", "-").replace(" ", "-")
        if len(name) < 2 or len(name) > 80:
            continue
        etype = info.get("type", "unknown").lower().strip()
        if etype not in VALID_TYPES:
            # Map common aliases
            aliases = {"org": "organization", "tech": "technology", "loc": "location",
                       "place": "location", "tool": "technology", "framework": "technology",
                       "language": "technology", "app": "product", "software": "product",
                       "service": "product", "group": "organization", "team": "organization"}
            etype = aliases.get(etype, "concept")
        context = info.get("context", "")
        if isinstance(context, str):
            context = context[:100]
        else:
            context = ""
        result[name] = {"type": etype, "context": context, "match": "llm"}
    return result
 def extract_entities_llm(text: str) -> dict[str, dict] | None:
    """
    Extract entities from text using Ollama LLM.
    Returns dict of {name: {type, context, match}} or None if LLM unavailable.
    None signals caller to fall back to regex.
    """
    if not text or len(text) < 10:
        return {}
    # Truncate very long texts
    if len(text) > 2000:
        text = text[:2000]
    prompt = NER_PROMPT.format(text=text)
    response = _call_ollama(prompt)
    if response is None:
        return None  # Signal fallback
    raw = _parse_json_response(response)
    return _normalize_entities(raw)
 def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None:
    """
    Extract entities from multiple texts in one LLM call.
    Returns combined dict or None if LLM unavailable.
    """
    if not texts:
        return {}
    # Filter and truncate
    clean = []
    for t in texts:
        if t and len(t) >= 10:
            clean.append(t[:500] if len(t) > 500 else t)
    if not clean:
        return {}
    # Limit batch size to keep prompt reasonable
    if len(clean) > 10:
        clean = clean[:10]
    numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean))
    prompt = BATCH_PROMPT.format(texts=numbered)
    response = _call_ollama(prompt)
    if response is None:
        return None
    raw = _parse_json_response(response)
    return _normalize_entities(raw)
 def is_available() -> bool:
    """Check if Ollama is reachable."""
    try:
        req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET")
        with urllib.request.urlopen(req, timeout=3) as resp:
            return resp.status == 200
    except Exception:
        return False
--- a/cortex/intelligence/loop.py
+++ b/cortex/intelligence/loop.py
@ -0,0 +1,830 @@
 #!/usr/bin/env python3
 """
 Darkplex Loop — The single heartbeat of the intelligence pipeline.
 One process. One loop. One state machine.
 Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
 Each cycle:
  1. INGEST  — Fetch new events from NATS (batch consumer pull)
  2. EXTRACT — Pull entities and relationships from events
  3. BRIDGE  — Sync cortex outputs to knowledge engine
  4. VERIFY  — Check that real output was produced
  5. REPORT  — Update state, alert on failure
 States:
  RUNNING   — Everything nominal
  DEGRADED  — A step failed, but loop continues with recovery attempts
  EMERGENCY — Critical failure, alerting
 Usage:
    darkplex loop                     # Run loop (default: 1h cycle)
    darkplex loop --once              # Single cycle, then exit
    darkplex loop --cycle 3600        # Custom cycle interval (seconds)
    darkplex loop --status            # Print current state and exit
    darkplex loop --check             # Check for new events, exit 0=new 1=none
 """
 import json
 import logging
 import os
 import re
 import signal
 import subprocess
 import sys
 import time
 import traceback
 import urllib.request
 from collections import deque
 from datetime import datetime, timezone
 from pathlib import Path
 # ── Paths (configurable via env) ─────────────────────────────────────────────
 BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
 SCRIPT_DIR = BASE_DIR / "scripts"
 LEVEL4_DIR = SCRIPT_DIR / "level4"
 LOG_DIR = BASE_DIR / "logs"
 STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
 KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
 ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
 RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
 NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
 NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
 NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
 DEFAULT_CYCLE_SECONDS = 3600  # 1 hour
 ALERT_COOLDOWN = 3600  # 1 alert per hour max
 log = logging.getLogger("darkplex-loop")
 # ── State Machine ────────────────────────────────────────────────────────────
 class LoopState:
    """Persistent state for the Darkplex Loop."""
    def __init__(self):
        self.status = "INIT"
        self.cycle_count = 0
        self.last_cycle = None
        self.last_success = None
        self.last_failure = None
        self.last_alert = None
        self.consecutive_failures = 0
        self.entities_total = 0
        self.relationships_total = 0
        self.entities_extracted_last = 0
        self.entities_new_last = 0
        self.events_processed_last = 0
        self.steps = {}
        self.error = None
        self.perf = {}  # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
        self.perf_history = []  # last 10 cycles [{total_ms, ingest_ms, ...}]
        self.quality_metrics = {}  # {unknown_rate, llm_success_rate, avg_entities_per_event}
        self.quality_history = []  # last 10: [{cycle, unknown_rate, llm_success_rate}]
        self.ollama_status = "unknown"  # healthy|degraded|down
        self._load()
    def _load(self):
        try:
            data = json.loads(STATE_FILE.read_text())
            for k, v in data.items():
                if hasattr(self, k):
                    setattr(self, k, v)
        except (FileNotFoundError, json.JSONDecodeError):
            pass
    def save(self):
        STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
        STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
    def record_perf(self, perf: dict):
        """Record performance metrics for this cycle."""
        self.perf = perf
        # Include unknown_rate in perf_history if available
        if self.quality_metrics:
            perf["unknown_rate"] = self.quality_metrics.get("unknown_rate", 0)
        self.perf_history.append(perf)
        self.perf_history = self.perf_history[-10:]  # keep last 10
    def perf_averages(self) -> dict:
        """Running averages over last 10 cycles."""
        if not self.perf_history:
            return {}
        keys = self.perf_history[0].keys()
        return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
    def record_success(self, step_results: dict):
        self.status = "RUNNING"
        self.consecutive_failures = 0
        self.last_success = datetime.now(timezone.utc).isoformat()
        self.last_cycle = self.last_success
        self.cycle_count += 1
        self.steps = step_results
        self.error = None
        self.save()
    def record_failure(self, step: str, error: str):
        self.consecutive_failures += 1
        self.last_failure = datetime.now(timezone.utc).isoformat()
        self.last_cycle = self.last_failure
        self.cycle_count += 1
        self.error = f"{step}: {error}"
        if self.consecutive_failures >= 3:
            self.status = "EMERGENCY"
        else:
            self.status = "DEGRADED"
        self.save()
    def can_alert(self) -> bool:
        if not self.last_alert:
            return True
        try:
            last = datetime.fromisoformat(self.last_alert)
            return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
        except (ValueError, TypeError):
            return True
    def mark_alerted(self):
        self.last_alert = datetime.now(timezone.utc).isoformat()
        self.save()
 # ── Pipeline Steps ───────────────────────────────────────────────────────────
 def _nats_cmd():
    """Build NATS CLI base command with auth."""
    nats_bin = os.environ.get("NATS_BIN", "nats")
    nats_url = os.environ.get("NATS_URL", "")
    if nats_url:
        return [nats_bin, "-s", nats_url]
    return [nats_bin]
 def check_new_events() -> int:
    """Return number of pending events in the consumer. 0 = nothing new."""
    try:
        r = subprocess.run(
            _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
            capture_output=True, text=True, timeout=10,
        )
        if r.returncode != 0:
            return -1
        info = json.loads(r.stdout)
        return info.get("num_pending", 0)
    except Exception as e:
        log.warning(f"check_new_events failed: {e}")
        return -1
 def step_ingest(state: LoopState) -> dict:
    """Step 1: Fetch new events from NATS using batch consumer pull."""
    log.info("STEP 1: INGEST — Fetching events from NATS")
    last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
    # Check how many pending
    pending = check_new_events()
    if pending == 0:
        log.info("INGEST: No new events — skipping cycle")
        return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
    log.info(f"INGEST: {pending} pending events in consumer")
    events = []
    total_fetched = 0
    parse_errors = 0
    # Fetch in batches
    remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
    try:
        batch_size = min(remaining, NATS_BATCH_SIZE)
        result = subprocess.run(
            _nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
                           "--count", str(batch_size), "--raw"],
            capture_output=True, text=True, timeout=30,
        )
        if result.returncode != 0:
            log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
            return _step_ingest_sequential(state)
        for line in result.stdout.strip().split("\n"):
            if not line.strip():
                continue
            try:
                data = json.loads(line)
                events.append(data)
                total_fetched += 1
            except json.JSONDecodeError:
                parse_errors += 1
    except subprocess.TimeoutExpired:
        log.warning("Batch fetch timed out, falling back to sequential")
        return _step_ingest_sequential(state)
    # Update sequence tracking (get current stream seq from consumer info)
    try:
        r = subprocess.run(
            _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
            capture_output=True, text=True, timeout=10,
        )
        if r.returncode == 0:
            info = json.loads(r.stdout)
            stream_seq = info["delivered"]["stream_seq"]
            last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
            last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
    except Exception:
        log.warning("Could not save last processed sequence")
    log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
    return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
 def _step_ingest_sequential(state: LoopState) -> dict:
    """Fallback: sequential fetch via stream get (slow but reliable)."""
    import base64
    log.info("INGEST FALLBACK: Sequential fetch")
    last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
    last_processed_seq = 0
    try:
        if last_processed_seq_file.exists():
            last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
    except Exception:
        pass
    r = subprocess.run(
        _nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
        capture_output=True, text=True, timeout=10,
    )
    if r.returncode != 0:
        return {"events": [], "total_scanned": 0, "skipped": 0}
    info = json.loads(r.stdout)
    end_seq = info["state"]["last_seq"]
    start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
    events = []
    skipped = 0
    for seq in range(start_seq, end_seq + 1):
        try:
            result = subprocess.run(
                _nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
                capture_output=True, text=True, timeout=5,
            )
            if result.returncode != 0:
                skipped += 1
                continue
            msg = json.loads(result.stdout)
            if "conversation_message_in" not in msg.get("subject", ""):
                skipped += 1
                continue
            data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
            events.append(data)
        except Exception:
            skipped += 1
    try:
        last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
        last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
    except Exception:
        pass
    log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
    return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
 def step_extract(state: LoopState, events: list) -> dict:
    """Step 2: Extract entities and relationships from events."""
    log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
    if not events:
        log.info("EXTRACT: No events to process")
        return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
    sys.path.insert(0, str(LEVEL4_DIR))
    import importlib.util
    spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
    em = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(em)
    # Try LLM batch extraction first
    from llm_extractor import extract_entities_llm_batch, is_available as llm_available
    use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
    llm_ok = use_llm and llm_available()
    if llm_ok:
        log.info("EXTRACT: Using LLM extractor (Ollama)")
    else:
        log.info("EXTRACT: Using regex extractor (fallback)")
    known = em.load_known_entities()
    entities = em.load_json(ENTITIES_FILE)
    relationships = em.load_json(RELATIONSHIPS_FILE)
    total_extracted = 0
    new_entities = 0
    new_relationships = 0
    ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
    # Prepare texts for potential batch LLM processing
    event_texts = []
    for event in events:
        payload = event.get("payload", {})
        text = payload.get("text_preview", "") or payload.get("text", "")
        if isinstance(text, list):
            parts = []
            for t in text:
                parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
            text = " ".join(parts)
        if not isinstance(text, str):
            text = str(text)
        score = _importance(text) if text else 0.0
        event_texts.append((text, score))
    # LLM batch extraction for qualifying texts (cap at 50 to keep cycle time reasonable)
    llm_results = {}
    if llm_ok:
        batch_texts = [t for t, s in sorted(
            [(t, s) for t, s in event_texts if t and s >= 0.4],
            key=lambda x: -x[1]  # highest importance first
        )][:50]
        if batch_texts:
            consecutive_fails = 0
            for i in range(0, len(batch_texts), 10):
                if consecutive_fails >= 3:
                    log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
                    llm_ok = False
                    break
                chunk = batch_texts[i:i+10]
                batch_result = extract_entities_llm_batch(chunk)
                if batch_result:
                    llm_results.update(batch_result)
                    consecutive_fails = 0
                else:
                    consecutive_fails += 1
            if llm_results:
                log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
    for idx, event in enumerate(events):
        text, score = event_texts[idx]
        if not text or score < 0.4:
            continue
        if llm_ok and llm_results:
            # Use LLM results + known entity matching
            found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
            # Add LLM entities that appear in this text
            text_lower = text.lower()
            for name, info in llm_results.items():
                variants = [name, name.replace("-", " "), name.replace("-", "")]
                if any(v in text_lower for v in variants if len(v) > 2):
                    found[name] = info
        else:
            found = em.extract_entities(text, known)
        if not found:
            continue
        total_extracted += len(found)
        names = list(found.keys())
        for name, info in found.items():
            if name not in entities:
                entities[name] = {
                    "type": info["type"],
                    "source": "darkplex-loop",
                    "first_seen": ts_now,
                }
                new_entities += 1
                known[name] = entities[name]
        if len(names) >= 2:
            for i in range(len(names)):
                for j in range(i + 1, min(len(names), i + 5)):
                    a, b = min(names[i], names[j]), max(names[i], names[j])
                    key = f"{a}::{b}"
                    if key in relationships:
                        relationships[key]["count"] = relationships[key].get("count", 1) + 1
                        relationships[key]["last_seen"] = ts_now
                    else:
                        relationships[key] = {
                            "a": a, "b": b, "types": ["co-occurrence"],
                            "count": 1, "first_seen": ts_now, "last_seen": ts_now,
                        }
                        new_relationships += 1
    em.save_json(ENTITIES_FILE, entities)
    em.save_json(RELATIONSHIPS_FILE, relationships)
    state.entities_total = len(entities)
    state.relationships_total = len(relationships)
    state.entities_extracted_last = total_extracted
    state.entities_new_last = new_entities
    state.events_processed_last = len(events)
    log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
    return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
 def step_bridge(state: LoopState) -> dict:
    """Step 3: Run knowledge bridge."""
    log.info("STEP 3: BRIDGE — Syncing cortex outputs")
    bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
    if not bridge_script.exists():
        log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
        return {"status": "skipped", "reason": "script not found"}
    result = subprocess.run(
        [sys.executable, str(bridge_script), "sync"],
        capture_output=True, text=True, timeout=120,
    )
    if result.returncode != 0:
        log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
        return {"status": "failed", "error": result.stderr[:200]}
    bridged = 0
    for line in result.stdout.split("\n"):
        m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
        if m:
            bridged += int(m.group(1))
    log.info(f"BRIDGE: {bridged} items bridged")
    return {"status": "ok", "bridged": bridged}
 def _check_quality(state: LoopState, extract_result: dict) -> list:
    """Check entity quality metrics. Returns list of issues/warnings."""
    issues = []
    # Load entities and compute unknown_rate
    try:
        entities = json.loads(ENTITIES_FILE.read_text()) if ENTITIES_FILE.exists() else {}
    except (json.JSONDecodeError, OSError):
        entities = {}
    total = len(entities)
    unknown_count = sum(1 for e in entities.values() if e.get("type") == "unknown")
    unknown_rate = (unknown_count / total * 100) if total > 0 else 0.0
    events_processed = state.events_processed_last or 1
    extracted = extract_result.get("extracted", 0)
    avg_entities_per_event = extracted / events_processed if events_processed > 0 else 0.0
    # Estimate LLM success rate from extraction (if LLM was used, new_entities > 0 is a proxy)
    llm_success_rate = 100.0  # default if no LLM used
    # We track this per-cycle based on whether extraction produced results
    if events_processed > 10 and extracted == 0:
        llm_success_rate = 0.0
    state.quality_metrics = {
        "unknown_rate": round(unknown_rate, 1),
        "llm_success_rate": round(llm_success_rate, 1),
        "avg_entities_per_event": round(avg_entities_per_event, 2),
    }
    if unknown_rate > 30:
        issues.append(f"High unknown entity rate: {unknown_rate:.1f}% ({unknown_count}/{total})")
    # Track quality history and detect trends
    state.quality_history.append({
        "cycle": state.cycle_count + 1,
        "unknown_rate": round(unknown_rate, 1),
        "llm_success_rate": round(llm_success_rate, 1),
    })
    state.quality_history = state.quality_history[-10:]  # keep last 10
    # Check if unknown_rate rising 3 cycles in a row
    if len(state.quality_history) >= 3:
        last3 = [h["unknown_rate"] for h in state.quality_history[-3:]]
        if last3[0] < last3[1] < last3[2]:
            issues.append(f"Entity quality degrading — unknown_rate rising: {last3}")
    log.info(f"VERIFY/QUALITY: unknown_rate={unknown_rate:.1f}%, avg_entities/event={avg_entities_per_event:.2f}")
    return issues
 def _check_ollama(state: LoopState) -> list:
    """Check Ollama health. Returns list of issues."""
    issues = []
    model = os.environ.get("DARKPLEX_OLLAMA_MODEL", os.environ.get("OLLAMA_MODEL", ""))
    try:
        req = urllib.request.Request("http://localhost:11434/api/tags", method="GET")
        with urllib.request.urlopen(req, timeout=5) as resp:
            data = json.loads(resp.read())
            models = [m.get("name", "") for m in data.get("models", [])]
            if model and not any(model in m for m in models):
                state.ollama_status = "degraded"
                issues.append(f"Ollama up but model '{model}' not loaded (available: {models[:5]})")
                log.warning(f"VERIFY/OLLAMA: degraded — model '{model}' not in {models[:5]}")
            else:
                state.ollama_status = "healthy"
                log.info(f"VERIFY/OLLAMA: healthy ({len(models)} models)")
    except Exception as e:
        state.ollama_status = "down"
        issues.append(f"Ollama down: {e}")
        log.warning(f"VERIFY/OLLAMA: down — {e}")
    return issues
 def _check_performance(state: LoopState) -> list:
    """Check for performance regressions. Returns list of issues."""
    issues = []
    if len(state.perf_history) < 2:
        return issues
    current = state.perf
    avgs = state.perf_averages()
    # Check total time vs rolling average
    curr_total = current.get("total_ms", 0)
    avg_total = avgs.get("total_ms", 0)
    if avg_total > 0 and curr_total > 2 * avg_total:
        issues.append(f"Performance regression detected: {curr_total}ms vs avg {avg_total}ms")
    # Check extraction time
    extract_ms = current.get("extract_ms", 0)
    if extract_ms > 120000:
        issues.append(f"Extraction too slow: {extract_ms}ms (>2min)")
    if issues:
        for i in issues:
            log.warning(f"VERIFY/PERF: {i}")
    else:
        log.info(f"VERIFY/PERF: OK (total={curr_total}ms, avg={avg_total}ms)")
    return issues
 def step_verify(state: LoopState, extract_result: dict) -> dict:
    """Step 4: Verify output quality."""
    log.info("STEP 4: VERIFY — Checking output quality")
    issues = []
    # File integrity checks
    for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
        if not f.exists():
            issues.append(f"{label} file missing")
        else:
            try:
                data = json.loads(f.read_text())
                if not data:
                    issues.append(f"{label} file is empty")
            except json.JSONDecodeError:
                issues.append(f"{label} file is corrupt JSON")
    events_processed = state.events_processed_last
    extracted = extract_result.get("extracted", 0)
    if events_processed > 10 and extracted == 0:
        issues.append(f"0 entities from {events_processed} events — extraction may be broken")
    # NATS check
    try:
        r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
        if r.returncode != 0:
            issues.append("NATS unreachable")
    except Exception as e:
        issues.append(f"NATS check failed: {e}")
    # New monitoring checks
    issues.extend(_check_quality(state, extract_result))
    issues.extend(_check_ollama(state))
    issues.extend(_check_performance(state))
    verdict = "PASS" if not issues else "FAIL"
    log.info(f"VERIFY: {verdict} — {len(issues)} issues")
    for issue in issues:
        log.warning(f"  ⚠ {issue}")
    return {"verdict": verdict, "issues": issues}
 def step_report(state: LoopState, verify_result: dict):
    """Step 5: Alert if degraded/emergency."""
    if state.status == "RUNNING":
        return
    if not state.can_alert():
        log.info("REPORT: Alert cooldown active, skipping")
        return
    severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
    msg = (
        f"Darkplex Loop {severity}\n"
        f"Consecutive failures: {state.consecutive_failures}\n"
        f"Error: {state.error}\n"
        f"Issues: {', '.join(verify_result.get('issues', []))}"
    )
    log.warning(f"REPORT: Sending alert — {state.status}")
    try:
        subprocess.run(
            ["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
            capture_output=True, text=True, timeout=15,
        )
    except Exception:
        pass
    flag = LOG_DIR / "darkplex-loop-alert.flag"
    flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
    state.mark_alerted()
 # ── Helpers ──────────────────────────────────────────────────────────────────
 def _importance(text: str) -> float:
    """Importance scoring for event text."""
    if not text:
        return 0.0
    score = 0.3
    if len(text) > 200: score += 0.1
    if len(text) > 500: score += 0.1
    caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
    if caps > 3: score += 0.1
    if caps > 8: score += 0.1
    for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
        if p.lower() in text.lower():
            score -= 0.3
    for w in ["meeting", "project", "company", "contract", "decision", "strategy",
              "budget", "deadline", "milestone", "partnership", "investment", "revenue",
              "client", "proposal", "agreement"]:
        if w in text.lower():
            score += 0.05
    return max(0.0, min(1.0, score))
 def print_status():
    """Print current loop state."""
    state = LoopState()
    ent_count = rel_count = 0
    try:
        ent_count = len(json.loads(ENTITIES_FILE.read_text()))
    except Exception:
        pass
    try:
        rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
    except Exception:
        pass
    icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "⚪")
    print(f"{icon} Status:       {state.status}")
    print(f"Cycles:       {state.cycle_count}")
    print(f"Last cycle:   {state.last_cycle or 'never'}")
    print(f"Last success: {state.last_success or 'never'}")
    print(f"Last failure: {state.last_failure or 'never'}")
    print(f"Failures:     {state.consecutive_failures}")
    print(f"Entities:     {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
    print(f"Relationships:{rel_count} total")
    if state.quality_metrics:
        qm = state.quality_metrics
        print(f"Quality:      unknown_rate={qm.get('unknown_rate', '?')}% llm_success={qm.get('llm_success_rate', '?')}% avg_ent/event={qm.get('avg_entities_per_event', '?')}")
    print(f"Ollama:       {state.ollama_status}")
    if state.perf:
        print(f"Last perf:    {state.perf}")
    if state.error:
        print(f"Error:        {state.error}")
 # ── Main Loop ────────────────────────────────────────────────────────────────
 def _ms_since(t0: float) -> int:
    return int((time.monotonic() - t0) * 1000)
 def run_cycle(state: LoopState) -> bool:
    """Run one complete pipeline cycle. Returns True on success."""
    log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
    step_results = {}
    perf = {}
    t_cycle = time.monotonic()
    try:
        t0 = time.monotonic()
        ingest = step_ingest(state)
        perf["ingest_ms"] = _ms_since(t0)
        step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
        # Early skip if no new events
        if ingest.get("skip_reason") == "no_new_events":
            perf["total_ms"] = _ms_since(t_cycle)
            state.record_perf(perf)
            state.save()
            log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
            return True
        t0 = time.monotonic()
        extract = step_extract(state, ingest["events"])
        perf["extract_ms"] = _ms_since(t0)
        step_results["extract"] = extract
        t0 = time.monotonic()
        bridge = step_bridge(state)
        perf["bridge_ms"] = _ms_since(t0)
        step_results["bridge"] = bridge
        t0 = time.monotonic()
        verify = step_verify(state, extract)
        perf["verify_ms"] = _ms_since(t0)
        step_results["verify"] = verify
        perf["total_ms"] = _ms_since(t_cycle)
        state.record_perf(perf)
        if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
            state.record_failure("verify", "; ".join(verify["issues"]))
            step_report(state, verify)
            return False
        state.record_success(step_results)
        avgs = state.perf_averages()
        log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status} — {perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
        log.info(f"  Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
        flag = LOG_DIR / "darkplex-loop-alert.flag"
        if flag.exists():
            flag.unlink()
        return True
    except Exception as e:
        perf["total_ms"] = _ms_since(t_cycle)
        state.record_perf(perf)
        step_name = "unknown"
        for name in ["ingest", "extract", "bridge", "verify"]:
            if name not in step_results:
                step_name = name
                break
        log.error(f"CYCLE FAILED at {step_name}: {e}")
        log.error(traceback.format_exc())
        state.record_failure(step_name, str(e)[:300])
        step_report(state, {"issues": [str(e)]})
        return False
 def main():
    """CLI entry point for `darkplex loop`."""
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(message)s",
        handlers=[
            logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
            logging.StreamHandler(),
        ],
    )
    LOG_DIR.mkdir(parents=True, exist_ok=True)
    args = sys.argv[1:]
    if "--status" in args:
        print_status()
        return
    if "--check" in args:
        pending = check_new_events()
        if pending > 0:
            print(f"NEW: {pending} events pending")
            sys.exit(0)
        elif pending == 0:
            print("NONE: No new events")
            sys.exit(1)
        else:
            print("ERROR: Could not check")
            sys.exit(2)
    once = "--once" in args
    cycle_seconds = DEFAULT_CYCLE_SECONDS
    for i, arg in enumerate(args):
        if arg == "--cycle" and i + 1 < len(args):
            cycle_seconds = int(args[i + 1])
    state = LoopState()
    log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
    running = True
    def handle_signal(sig, frame):
        nonlocal running
        log.info("Shutdown signal received")
        running = False
    signal.signal(signal.SIGTERM, handle_signal)
    signal.signal(signal.SIGINT, handle_signal)
    while running:
        run_cycle(state)
        if once:
            break
        log.info(f"Sleeping {cycle_seconds}s until next cycle...")
        for _ in range(cycle_seconds):
            if not running:
                break
            time.sleep(1)
    log.info("Darkplex Loop stopped")
--- a/cortex/intelligence/shared_memory.py
+++ b/cortex/intelligence/shared_memory.py
@ -0,0 +1,152 @@
 """Cross-Agent Memory Bus: NATS pub/sub for agent insights.
 Agents publish insights (observations, learned facts, warnings) to the bus.
 Other agents subscribe to topics relevant to their function.
 ⚠️ DATA ISOLATION: Only Vainplex-internal agents participate.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any, Callable, Awaitable
 logger = logging.getLogger(__name__)
 NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
 # Only these agents are allowed to participate in shared memory
 ALLOWED_AGENTS: set[str] = set(
    os.environ.get("INTELLIGENCE_ALLOWED_AGENTS", "claudia,vera,stella,viola").split(",")
 )
 INSIGHT_SUBJECT_PREFIX = "darkplex.intelligence.insights"
@dataclass
 class Insight:
    """An agent insight to be shared across the memory bus."""
    agent: str
    topic: str
    content: str
    confidence: float = 0.8  # 0.0-1.0
    tags: list[str] = field(default_factory=list)
    timestamp: str = ""
    metadata: dict[str, Any] = field(default_factory=dict)
    def __post_init__(self) -> None:
        if not self.timestamp:
            self.timestamp = datetime.now(timezone.utc).isoformat()
    def to_json(self) -> str:
        return json.dumps({
            "agent": self.agent,
            "topic": self.topic,
            "content": self.content,
            "confidence": self.confidence,
            "tags": self.tags,
            "timestamp": self.timestamp,
            "metadata": self.metadata,
        })
    @classmethod
    def from_json(cls, data: str) -> Insight:
        d = json.loads(data)
        return cls(**d)
 InsightHandler = Callable[[Insight], Awaitable[None]]
 class SharedMemory:
    """Cross-agent memory bus using NATS pub/sub.
    Usage:
        memory = SharedMemory(agent_name="claudia")
        await memory.connect()
        await memory.publish(Insight(agent="claudia", topic="infra", content="..."))
        await memory.subscribe("infra", handler)
    ⚠️ Enforces data isolation: only allowed agents can publish/subscribe.
    """
    def __init__(self, agent_name: str, nats_url: str | None = None) -> None:
        if agent_name not in ALLOWED_AGENTS:
            raise ValueError(
                f"Agent '{agent_name}' is not allowed in shared memory. "
                f"Allowed: {ALLOWED_AGENTS}"
            )
        self.agent_name = agent_name
        self.nats_url = nats_url or NATS_URL
        self._nats_client: Any = None
        self._subscriptions: list[Any] = []
    async def connect(self) -> None:
        """Connect to the NATS server."""
        try:
            import nats
            self._nats_client = await nats.connect(self.nats_url)
            logger.info("SharedMemory connected for agent '%s'", self.agent_name)
        except Exception:
            logger.exception("Failed to connect SharedMemory to NATS")
            raise
    async def publish(self, insight: Insight) -> None:
        """Publish an insight to the memory bus.
        Args:
            insight: The insight to share. Agent field must match this instance's agent.
        """
        if not self._nats_client:
            raise RuntimeError("Not connected. Call connect() first.")
        if insight.agent not in ALLOWED_AGENTS:
            raise ValueError(f"Agent '{insight.agent}' not allowed to publish insights")
        subject = f"{INSIGHT_SUBJECT_PREFIX}.{insight.topic}"
        await self._nats_client.publish(subject, insight.to_json().encode())
        logger.debug(
            "Published insight: %s/%s by %s", insight.topic, insight.content[:50], insight.agent
        )
    async def subscribe(self, topic: str, handler: InsightHandler) -> None:
        """Subscribe to insights on a topic.
        Args:
            topic: Topic to subscribe to (supports NATS wildcards).
            handler: Async callback for received insights.
        """
        if not self._nats_client:
            raise RuntimeError("Not connected. Call connect() first.")
        subject = f"{INSIGHT_SUBJECT_PREFIX}.{topic}"
        async def _message_handler(msg: Any) -> None:
            try:
                insight = Insight.from_json(msg.data.decode())
                if insight.agent not in ALLOWED_AGENTS:
                    logger.warning(
                        "Ignoring insight from non-allowed agent: %s", insight.agent
                    )
                    return
                await handler(insight)
            except Exception:
                logger.exception("Error handling insight message")
        sub = await self._nats_client.subscribe(subject, cb=_message_handler)
        self._subscriptions.append(sub)
        logger.info("Subscribed to insights: %s", subject)
    async def close(self) -> None:
        """Unsubscribe and disconnect."""
        for sub in self._subscriptions:
            await sub.unsubscribe()
        self._subscriptions.clear()
        if self._nats_client:
            await self._nats_client.close()
            self._nats_client = None
--- a/cortex/intelligence/temporal.py
+++ b/cortex/intelligence/temporal.py
@ -0,0 +1,193 @@
 """Temporal Context API: chronological knowledge retrieval.
 Queries NATS events and ChromaDB with a time dimension to answer:
 "What do we know about X, chronologically?"
 """
 from __future__ import annotations
 import logging
 import os
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 logger = logging.getLogger(__name__)
 # Default config from environment
 NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
 CHROMADB_URL = os.environ.get("CHROMADB_URL", "http://localhost:8000")
@dataclass
 class TemporalEntry:
    """A knowledge entry with temporal metadata."""
    timestamp: datetime
    source: str  # "nats" or "chromadb"
    topic: str
    content: str
    metadata: dict[str, Any] = field(default_factory=dict)
    relevance_score: float = 0.0
@dataclass
 class TemporalQuery:
    """Query parameters for temporal context retrieval."""
    topic: str
    start_time: datetime | None = None
    end_time: datetime | None = None
    limit: int = 50
    sources: list[str] = field(default_factory=lambda: ["nats", "chromadb"])
 class TemporalContext:
    """Retrieves chronological knowledge from NATS events and ChromaDB.
    Usage:
        ctx = TemporalContext()
        entries = await ctx.query(TemporalQuery(topic="ssl-cert"))
    """
    def __init__(
        self,
        nats_url: str | None = None,
        chromadb_url: str | None = None,
    ) -> None:
        self.nats_url = nats_url or NATS_URL
        self.chromadb_url = chromadb_url or CHROMADB_URL
        self._nats_client: Any = None
        self._chroma_client: Any = None
    async def connect(self) -> None:
        """Initialize connections to NATS and ChromaDB."""
        try:
            import nats
            self._nats_client = await nats.connect(self.nats_url)
            logger.info("Connected to NATS: %s", self.nats_url)
        except Exception:
            logger.exception("Failed to connect to NATS")
        try:
            import chromadb
            self._chroma_client = chromadb.HttpClient(host=self.chromadb_url)
            logger.info("Connected to ChromaDB: %s", self.chromadb_url)
        except Exception:
            logger.exception("Failed to connect to ChromaDB")
    async def query(self, query: TemporalQuery) -> list[TemporalEntry]:
        """Query temporal context across configured sources.
        Returns entries sorted chronologically (oldest first).
        """
        entries: list[TemporalEntry] = []
        if "nats" in query.sources and self._nats_client:
            nats_entries = await self._query_nats(query)
            entries.extend(nats_entries)
        if "chromadb" in query.sources and self._chroma_client:
            chroma_entries = self._query_chromadb(query)
            entries.extend(chroma_entries)
        # Sort chronologically
        entries.sort(key=lambda e: e.timestamp)
        # Apply limit
        if query.limit:
            entries = entries[:query.limit]
        return entries
    async def _query_nats(self, query: TemporalQuery) -> list[TemporalEntry]:
        """Query NATS JetStream for historical events matching the topic."""
        entries: list[TemporalEntry] = []
        try:
            js = self._nats_client.jetstream()
            subject = f"darkplex.*.{query.topic}.>"
            # Get messages from the stream
            sub = await js.subscribe(subject, ordered_consumer=True)
            count = 0
            async for msg in sub.messages:
                if count >= query.limit:
                    break
                timestamp = datetime.fromtimestamp(
                    msg.headers.get("Nats-Time-Stamp", 0) if msg.headers else 0,
                    tz=timezone.utc,
                )
                if query.start_time and timestamp < query.start_time:
                    continue
                if query.end_time and timestamp > query.end_time:
                    continue
                entries.append(TemporalEntry(
                    timestamp=timestamp,
                    source="nats",
                    topic=query.topic,
                    content=msg.data.decode() if msg.data else "",
                    metadata={"subject": msg.subject},
                ))
                count += 1
        except Exception:
            logger.exception("NATS temporal query failed for topic: %s", query.topic)
        return entries
    def _query_chromadb(self, query: TemporalQuery) -> list[TemporalEntry]:
        """Query ChromaDB for semantically relevant entries with time filtering."""
        entries: list[TemporalEntry] = []
        try:
            collection = self._chroma_client.get_or_create_collection("darkplex_knowledge")
            where_filter: dict[str, Any] = {}
            if query.start_time:
                where_filter["timestamp"] = {"$gte": query.start_time.isoformat()}
            if query.end_time:
                if "timestamp" in where_filter:
                    where_filter = {
                        "$and": [
                            {"timestamp": {"$gte": query.start_time.isoformat()}},
                            {"timestamp": {"$lte": query.end_time.isoformat()}},
                        ]
                    }
                else:
                    where_filter["timestamp"] = {"$lte": query.end_time.isoformat()}
            results = collection.query(
                query_texts=[query.topic],
                n_results=query.limit,
                where=where_filter if where_filter else None,
            )
            if results and results.get("documents"):
                for i, doc in enumerate(results["documents"][0]):
                    meta = results["metadatas"][0][i] if results.get("metadatas") else {}
                    ts_str = meta.get("timestamp", "")
                    try:
                        ts = datetime.fromisoformat(ts_str)
                    except (ValueError, TypeError):
                        ts = datetime.now(timezone.utc)
                    entries.append(TemporalEntry(
                        timestamp=ts,
                        source="chromadb",
                        topic=query.topic,
                        content=doc,
                        metadata=meta,
                        relevance_score=results["distances"][0][i] if results.get("distances") else 0.0,
                    ))
        except Exception:
            logger.exception("ChromaDB temporal query failed for topic: %s", query.topic)
        return entries
    async def close(self) -> None:
        """Close connections."""
        if self._nats_client:
            await self._nats_client.close()
--- a/cortex/knowledge_extractor.py
+++ b/cortex/knowledge_extractor.py
@ -0,0 +1,345 @@
 #!/usr/bin/env python3
 """
 Smart Extractor — Extract entities from NATS events and update knowledge graph.
 Part of Level 4.4 AGI Roadmap.
 Usage:
    smart-extractor.py --last 100     — Process last N events
    smart-extractor.py --since 6h     — Process events from last 6 hours
    smart-extractor.py --dry-run      — Show what would be extracted without saving
 """
 import sys
 import os
 import json
 import subprocess
 import re
 import time
 import logging
 from pathlib import Path
 from datetime import datetime
 # Import entity-manager functions
 sys.path.insert(0, str(Path(__file__).parent))
 from importlib import import_module
 SCRIPT_DIR = Path(__file__).parent
 LOG_DIR = Path.home() / "clawd" / "logs"
 LOG_FILE = LOG_DIR / "entity-extraction.log"
 KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
 ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
 RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
 NATS_STREAM = "openclaw-events"
 CONSUMER_NAME = "kg-extractor-temp"
 # Setup logging
 LOG_DIR.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(LOG_FILE),
        logging.StreamHandler(),
    ],
 )
 log = logging.getLogger("smart-extractor")
 def load_json(path):
    try:
        with open(path) as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}
 def save_json(path, data):
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
 def importance_heuristic(text):
    """Simple importance scoring (0-1) based on content heuristics."""
    if not text:
        return 0.0
    score = 0.3  # base
    # Boost for substantive content
    if len(text) > 200:
        score += 0.1
    if len(text) > 500:
        score += 0.1
    # Boost for entity-rich content
    caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
    if caps > 3:
        score += 0.1
    if caps > 8:
        score += 0.1
    # Penalize heartbeat/cron noise
    noise_patterns = ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]
    for p in noise_patterns:
        if p.lower() in text.lower():
            score -= 0.3
    # Boost for business/project content
    boost_words = ["meeting", "project", "company", "contract", "decision",
                   "strategy", "budget", "deadline", "milestone", "partnership",
                   "investment", "revenue", "client", "proposal", "agreement"]
    for w in boost_words:
        if w in text.lower():
            score += 0.05
    return max(0.0, min(1.0, score))
 def fetch_events_nats(last=None, since=None):
    """Fetch events from NATS using consumer approach."""
    events = []
    # Create a temporary pull consumer
    filter_subj = "openclaw.events.main.conversation_message_in"
    # Use direct stream get instead of consumer (more reliable)
    try:
        # Get stream info for sequence range
        info_result = subprocess.run(
            ["nats", "stream", "info", NATS_STREAM, "--json"],
            capture_output=True, text=True, timeout=10
        )
        if info_result.returncode != 0:
            log.error("Failed to get stream info")
            return events
        info = json.loads(info_result.stdout)
        end_seq = info["state"]["last_seq"]
        start_seq = info["state"]["first_seq"]
        # Calculate range
        count = last or 500
        if since:
            # Estimate start sequence from time
            ms_since = parse_since(since) * 1000
            total_ms = (time.time() * 1000) - (datetime.fromisoformat(info["state"]["first_ts"].replace("Z", "+00:00")).timestamp() * 1000)
            total_msgs = end_seq - start_seq
            msgs_per_ms = total_msgs / total_ms if total_ms > 0 else 1
            fetch_start = max(start_seq, int(end_seq - ms_since * msgs_per_ms * 1.2))
        else:
            fetch_start = max(start_seq, end_seq - count)
        # Only fetch conversation messages
        log.info(f"Fetching sequences {fetch_start} - {end_seq}")
        step = max(1, (end_seq - fetch_start) // count)
        for seq in range(fetch_start, end_seq + 1, step):
            try:
                result = subprocess.run(
                    ["nats", "stream", "get", NATS_STREAM, str(seq), "--json"],
                    capture_output=True, text=True, timeout=5
                )
                if result.returncode != 0:
                    continue
                msg = json.loads(result.stdout)
                subj = msg.get("subject", "")
                if "conversation_message_in" not in subj:
                    continue
                import base64
                # Input validation: max size check (1MB)
                raw_data = msg.get("data", "")
                if len(raw_data) > 1_048_576:
                    log.warning("Skipping oversized message at seq %d (%d bytes)", seq, len(raw_data))
                    continue
                try:
                    decoded = base64.b64decode(raw_data)
                except Exception as e:
                    log.warning("Invalid base64 at seq %d: %s", seq, e)
                    continue
                try:
                    data = json.loads(decoded.decode("utf-8"))
                except (json.JSONDecodeError, UnicodeDecodeError) as e:
                    log.warning("Invalid JSON at seq %d: %s", seq, e)
                    continue
                if not isinstance(data, dict):
                    log.warning("Expected dict at seq %d, got %s", seq, type(data).__name__)
                    continue
                events.append(data)
            except Exception:
                continue
        log.info(f"Fetched {len(events)} conversation events")
    except subprocess.TimeoutExpired:
        log.warning("NATS command timed out")
    except FileNotFoundError:
        log.warning("nats CLI not found — skipping NATS extraction")
    # Filter by time if --since specified
    if since and events:
        cutoff = parse_since(since)
        if cutoff:
            events = [e for e in events if e.get("timestamp", 0) / 1000 >= cutoff]
    return events
 def parse_since(since_str):
    """Parse duration string like '6h', '1d', '30m' to epoch timestamp."""
    m = re.match(r"(\d+)([hdm])", since_str)
    if not m:
        return None
    val, unit = int(m.group(1)), m.group(2)
    seconds = {"h": 3600, "d": 86400, "m": 60}[unit]
    return time.time() - (val * seconds)
 def extract_from_event(event, known_entities):
    """Extract entities from a single event."""
    # Import extract_entities from entity_manager
    em = sys.modules.get("entity_manager_mod")
    if not em:
        # Load entity-manager module
        spec_path = Path(__file__).parent / "entity_manager.py"
        import importlib.util
        spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path)
        em = importlib.util.module_from_spec(spec)
        sys.modules["entity_manager_mod"] = em
        spec.loader.exec_module(em)
    payload = event.get("payload", {})
    text = payload.get("text_preview", "") or payload.get("text", "")
    if isinstance(text, list):
        text = " ".join(str(t) for t in text)
    if not isinstance(text, str):
        text = str(text)
    if not text:
        return {}, 0.0
    score = importance_heuristic(text)
    if score < 0.4:
        return {}, score
    found = em.extract_entities(text, known_entities)
    return found, score
 def run_extraction(last=None, since=None, dry_run=False):
    """Main extraction pipeline."""
    log.info(f"Starting extraction (last={last}, since={since}, dry_run={dry_run})")
    # Load known entities
    spec_path = Path(__file__).parent / "entity_manager.py"
    import importlib.util
    spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path)
    em = importlib.util.module_from_spec(spec)
    sys.modules["entity_manager_mod"] = em
    spec.loader.exec_module(em)
    known = em.load_known_entities()
    log.info(f"Loaded {len(known)} known entities")
    # Fetch events
    events = fetch_events_nats(last=last, since=since)
    log.info(f"Fetched {len(events)} events from NATS")
    if not events:
        log.info("No events to process")
        return
    entities = em.load_json(ENTITIES_FILE)
    relationships = em.load_json(RELATIONSHIPS_FILE)
    total_extracted = 0
    new_entities = 0
    new_relationships = 0
    ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
    for event in events:
        found, score = extract_from_event(event, known)
        if not found:
            continue
        total_extracted += len(found)
        names = list(found.keys())
        # Add new entities
        for name, info in found.items():
            if name not in entities:
                entities[name] = {
                    "type": info["type"],
                    "source": "nats-extraction",
                    "first_seen": ts_now,
                }
                new_entities += 1
                known[name] = entities[name]
        # Create co-occurrence relationships between entities found in same message
        if len(names) >= 2:
            for i in range(len(names)):
                for j in range(i + 1, min(len(names), i + 5)):  # limit pairs
                    a, b = min(names[i], names[j]), max(names[i], names[j])
                    key = f"{a}::{b}"
                    if key in relationships:
                        relationships[key]["count"] = relationships[key].get("count", 1) + 1
                        relationships[key]["last_seen"] = ts_now
                    else:
                        relationships[key] = {
                            "a": a, "b": b,
                            "types": ["co-occurrence"],
                            "count": 1,
                            "first_seen": ts_now,
                            "last_seen": ts_now,
                        }
                        new_relationships += 1
        if not dry_run and total_extracted % 50 == 0 and total_extracted > 0:
            # Periodic save
            em.save_json(ENTITIES_FILE, entities)
            em.save_json(RELATIONSHIPS_FILE, relationships)
    if not dry_run:
        em.save_json(ENTITIES_FILE, entities)
        em.save_json(RELATIONSHIPS_FILE, relationships)
    log.info(
        f"Done: {len(events)} events processed, {total_extracted} entities extracted, "
        f"{new_entities} new entities, {new_relationships} new relationships"
    )
    print(
        f"\nResults: {len(events)} events → {total_extracted} entities extracted, "
        f"{new_entities} new, {new_relationships} new relationships"
    )
 def main():
    last = None
    since = None
    dry_run = False
    args = sys.argv[1:]
    i = 0
    while i < len(args):
        if args[i] == "--last" and i + 1 < len(args):
            last = int(args[i + 1])
            i += 2
        elif args[i] == "--since" and i + 1 < len(args):
            since = args[i + 1]
            i += 2
        elif args[i] == "--dry-run":
            dry_run = True
            i += 1
        else:
            print(__doc__)
            sys.exit(1)
    if last is None and since is None:
        last = 100  # default
    run_extraction(last=last, since=since, dry_run=dry_run)
 if __name__ == "__main__":
    main()
--- a/cortex/llm_extractor.py
+++ b/cortex/llm_extractor.py
@ -0,0 +1,214 @@
 #!/usr/bin/env python3
 """
 LLM-Powered Entity Extractor — Uses Ollama for Named Entity Recognition.
 Standalone module. No pip dependencies beyond stdlib.
 Calls Ollama HTTP API with structured NER prompts.
 Configuration via environment variables:
    DARKPLEX_OLLAMA_URL      — Ollama base URL (default: http://localhost:11434)
    DARKPLEX_OLLAMA_MODEL    — Model name (default: mistral:7b)
    DARKPLEX_OLLAMA_TIMEOUT  — Timeout in seconds (default: 10)
    DARKPLEX_EXTRACTOR       — llm|regex|auto (default: auto)
 """
 import json
 import logging
 import os
 import urllib.request
 import urllib.error
 log = logging.getLogger("llm-extractor")
 OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434")
 OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "mistral:7b")
 OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30"))
 VALID_TYPES = {"person", "organization", "company", "project", "technology",
               "location", "event", "concept", "product"}
 NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object.
 Each key is the entity name (lowercase), each value has "type" and "context".
 Valid types: person, organization, company, project, technology, location, event, concept, product
 Rules:
 - Skip common/generic words (the, system, message, etc.)
 - Entity names should be lowercase, use hyphens for multi-word
 - "context" is a 2-5 word description of the entity's role in the text
 - If no entities found, return empty JSON object
 - Return ONLY valid JSON, no explanation
 Text:
 {text}
 JSON:"""
 BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object.
 Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context".
 Valid types: person, organization, company, project, technology, location, event, concept, product
 Rules:
 - Skip common/generic words
 - "context" is a 2-5 word description
 - If no entities found, return empty JSON object
 - Return ONLY valid JSON, no markdown, no explanation
 Texts:
 {texts}
 JSON:"""
 def _call_ollama(prompt: str) -> str | None:
    """Call Ollama generate API. Returns response text or None on failure."""
    payload = json.dumps({
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False,
        "options": {"temperature": 0.1, "num_predict": 1024},
    }).encode()
    req = urllib.request.Request(
        f"{OLLAMA_URL}/api/generate",
        data=payload,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp:
            data = json.loads(resp.read().decode())
            return data.get("response", "")
    except (urllib.error.URLError, TimeoutError, OSError) as e:
        log.warning(f"Ollama call failed: {e}")
        return None
    except Exception as e:
        log.warning(f"Ollama unexpected error: {e}")
        return None
 def _parse_json_response(text: str) -> dict:
    """Extract JSON dict from LLM response, handling markdown fences etc."""
    if not text:
        return {}
    # Strip markdown code fences
    text = text.strip()
    if text.startswith("```"):
        lines = text.split("\n")
        lines = [l for l in lines if not l.strip().startswith("```")]
        text = "\n".join(lines)
    # Find the JSON object
    start = text.find("{")
    if start == -1:
        return {}
    # Find matching closing brace
    depth = 0
    for i in range(start, len(text)):
        if text[i] == "{":
            depth += 1
        elif text[i] == "}":
            depth -= 1
            if depth == 0:
                try:
                    return json.loads(text[start:i + 1])
                except json.JSONDecodeError:
                    return {}
    return {}
 def _normalize_entities(raw: dict) -> dict:
    """Normalize and validate extracted entities."""
    result = {}
    for name, info in raw.items():
        if not isinstance(info, dict):
            continue
        name = name.strip().lower().replace("_", "-").replace(" ", "-")
        if len(name) < 2 or len(name) > 80:
            continue
        etype = info.get("type", "unknown").lower().strip()
        if etype not in VALID_TYPES:
            # Map common aliases
            aliases = {"org": "organization", "tech": "technology", "loc": "location",
                       "place": "location", "tool": "technology", "framework": "technology",
                       "language": "technology", "app": "product", "software": "product",
                       "service": "product", "group": "organization", "team": "organization"}
            etype = aliases.get(etype, "concept")
        context = info.get("context", "")
        if isinstance(context, str):
            context = context[:100]
        else:
            context = ""
        result[name] = {"type": etype, "context": context, "match": "llm"}
    return result
 def extract_entities_llm(text: str) -> dict[str, dict] | None:
    """
    Extract entities from text using Ollama LLM.
    Returns dict of {name: {type, context, match}} or None if LLM unavailable.
    None signals caller to fall back to regex.
    """
    if not text or len(text) < 10:
        return {}
    # Truncate very long texts
    if len(text) > 2000:
        text = text[:2000]
    prompt = NER_PROMPT.format(text=text)
    response = _call_ollama(prompt)
    if response is None:
        return None  # Signal fallback
    raw = _parse_json_response(response)
    return _normalize_entities(raw)
 def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None:
    """
    Extract entities from multiple texts in one LLM call.
    Returns combined dict or None if LLM unavailable.
    """
    if not texts:
        return {}
    # Filter and truncate
    clean = []
    for t in texts:
        if t and len(t) >= 10:
            clean.append(t[:500] if len(t) > 500 else t)
    if not clean:
        return {}
    # Limit batch size to keep prompt reasonable
    if len(clean) > 10:
        clean = clean[:10]
    numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean))
    prompt = BATCH_PROMPT.format(texts=numbered)
    response = _call_ollama(prompt)
    if response is None:
        return None
    raw = _parse_json_response(response)
    return _normalize_entities(raw)
 def is_available() -> bool:
    """Check if Ollama is reachable."""
    try:
        req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET")
        with urllib.request.urlopen(req, timeout=3) as resp:
            return resp.status == 200
    except Exception:
        return False
--- a/cortex/loop.py
+++ b/cortex/loop.py
@ -0,0 +1,701 @@
 #!/usr/bin/env python3
 """
 Darkplex Loop — The single heartbeat of the intelligence pipeline.
 One process. One loop. One state machine.
 Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
 Each cycle:
  1. INGEST  — Fetch new events from NATS (batch consumer pull)
  2. EXTRACT — Pull entities and relationships from events
  3. BRIDGE  — Sync cortex outputs to knowledge engine
  4. VERIFY  — Check that real output was produced
  5. REPORT  — Update state, alert on failure
 States:
  RUNNING   — Everything nominal
  DEGRADED  — A step failed, but loop continues with recovery attempts
  EMERGENCY — Critical failure, alerting
 Usage:
    darkplex loop                     # Run loop (default: 1h cycle)
    darkplex loop --once              # Single cycle, then exit
    darkplex loop --cycle 3600        # Custom cycle interval (seconds)
    darkplex loop --status            # Print current state and exit
    darkplex loop --check             # Check for new events, exit 0=new 1=none
 """
 import json
 import logging
 import os
 import re
 import signal
 import subprocess
 import sys
 import time
 import traceback
 from collections import deque
 from datetime import datetime, timezone
 from pathlib import Path
 # ── Paths (configurable via env) ─────────────────────────────────────────────
 BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
 SCRIPT_DIR = BASE_DIR / "scripts"
 LEVEL4_DIR = SCRIPT_DIR / "level4"
 LOG_DIR = BASE_DIR / "logs"
 STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
 KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
 ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
 RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
 NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
 NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
 NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
 DEFAULT_CYCLE_SECONDS = 3600  # 1 hour
 ALERT_COOLDOWN = 3600  # 1 alert per hour max
 log = logging.getLogger("darkplex-loop")
 # ── State Machine ────────────────────────────────────────────────────────────
 class LoopState:
    """Persistent state for the Darkplex Loop."""
    def __init__(self):
        self.status = "INIT"
        self.cycle_count = 0
        self.last_cycle = None
        self.last_success = None
        self.last_failure = None
        self.last_alert = None
        self.consecutive_failures = 0
        self.entities_total = 0
        self.relationships_total = 0
        self.entities_extracted_last = 0
        self.entities_new_last = 0
        self.events_processed_last = 0
        self.steps = {}
        self.error = None
        self.perf = {}  # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
        self.perf_history = []  # last 10 cycles [{total_ms, ingest_ms, ...}]
        self._load()
    def _load(self):
        try:
            data = json.loads(STATE_FILE.read_text())
            for k, v in data.items():
                if hasattr(self, k):
                    setattr(self, k, v)
        except (FileNotFoundError, json.JSONDecodeError):
            pass
    def save(self):
        STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
        STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
    def record_perf(self, perf: dict):
        """Record performance metrics for this cycle."""
        self.perf = perf
        self.perf_history.append(perf)
        self.perf_history = self.perf_history[-10:]  # keep last 10
    def perf_averages(self) -> dict:
        """Running averages over last 10 cycles."""
        if not self.perf_history:
            return {}
        keys = self.perf_history[0].keys()
        return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
    def record_success(self, step_results: dict):
        self.status = "RUNNING"
        self.consecutive_failures = 0
        self.last_success = datetime.now(timezone.utc).isoformat()
        self.last_cycle = self.last_success
        self.cycle_count += 1
        self.steps = step_results
        self.error = None
        self.save()
    def record_failure(self, step: str, error: str):
        self.consecutive_failures += 1
        self.last_failure = datetime.now(timezone.utc).isoformat()
        self.last_cycle = self.last_failure
        self.cycle_count += 1
        self.error = f"{step}: {error}"
        if self.consecutive_failures >= 3:
            self.status = "EMERGENCY"
        else:
            self.status = "DEGRADED"
        self.save()
    def can_alert(self) -> bool:
        if not self.last_alert:
            return True
        try:
            last = datetime.fromisoformat(self.last_alert)
            return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
        except (ValueError, TypeError):
            return True
    def mark_alerted(self):
        self.last_alert = datetime.now(timezone.utc).isoformat()
        self.save()
 # ── Pipeline Steps ───────────────────────────────────────────────────────────
 def _nats_cmd():
    """Build NATS CLI base command with auth."""
    nats_bin = os.environ.get("NATS_BIN", "nats")
    nats_url = os.environ.get("NATS_URL", "")
    if nats_url:
        return [nats_bin, "-s", nats_url]
    return [nats_bin]
 def check_new_events() -> int:
    """Return number of pending events in the consumer. 0 = nothing new."""
    try:
        r = subprocess.run(
            _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
            capture_output=True, text=True, timeout=10,
        )
        if r.returncode != 0:
            return -1
        info = json.loads(r.stdout)
        return info.get("num_pending", 0)
    except Exception as e:
        log.warning(f"check_new_events failed: {e}")
        return -1
 def step_ingest(state: LoopState) -> dict:
    """Step 1: Fetch new events from NATS using batch consumer pull."""
    log.info("STEP 1: INGEST — Fetching events from NATS")
    last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
    # Check how many pending
    pending = check_new_events()
    if pending == 0:
        log.info("INGEST: No new events — skipping cycle")
        return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
    log.info(f"INGEST: {pending} pending events in consumer")
    events = []
    total_fetched = 0
    parse_errors = 0
    # Fetch in batches
    remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
    try:
        batch_size = min(remaining, NATS_BATCH_SIZE)
        result = subprocess.run(
            _nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
                           "--count", str(batch_size), "--raw"],
            capture_output=True, text=True, timeout=30,
        )
        if result.returncode != 0:
            log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
            return _step_ingest_sequential(state)
        for line in result.stdout.strip().split("\n"):
            if not line.strip():
                continue
            try:
                data = json.loads(line)
                events.append(data)
                total_fetched += 1
            except json.JSONDecodeError:
                parse_errors += 1
    except subprocess.TimeoutExpired:
        log.warning("Batch fetch timed out, falling back to sequential")
        return _step_ingest_sequential(state)
    # Update sequence tracking (get current stream seq from consumer info)
    try:
        r = subprocess.run(
            _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
            capture_output=True, text=True, timeout=10,
        )
        if r.returncode == 0:
            info = json.loads(r.stdout)
            stream_seq = info["delivered"]["stream_seq"]
            last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
            last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
    except Exception:
        log.warning("Could not save last processed sequence")
    log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
    return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
 def _step_ingest_sequential(state: LoopState) -> dict:
    """Fallback: sequential fetch via stream get (slow but reliable)."""
    import base64
    log.info("INGEST FALLBACK: Sequential fetch")
    last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
    last_processed_seq = 0
    try:
        if last_processed_seq_file.exists():
            last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
    except Exception:
        pass
    r = subprocess.run(
        _nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
        capture_output=True, text=True, timeout=10,
    )
    if r.returncode != 0:
        return {"events": [], "total_scanned": 0, "skipped": 0}
    info = json.loads(r.stdout)
    end_seq = info["state"]["last_seq"]
    start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
    events = []
    skipped = 0
    for seq in range(start_seq, end_seq + 1):
        try:
            result = subprocess.run(
                _nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
                capture_output=True, text=True, timeout=5,
            )
            if result.returncode != 0:
                skipped += 1
                continue
            msg = json.loads(result.stdout)
            if "conversation_message_in" not in msg.get("subject", ""):
                skipped += 1
                continue
            data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
            events.append(data)
        except Exception:
            skipped += 1
    try:
        last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
        last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
    except Exception:
        pass
    log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
    return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
 def step_extract(state: LoopState, events: list) -> dict:
    """Step 2: Extract entities and relationships from events."""
    log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
    if not events:
        log.info("EXTRACT: No events to process")
        return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
    sys.path.insert(0, str(LEVEL4_DIR))
    import importlib.util
    spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
    em = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(em)
    # Try LLM batch extraction first
    from llm_extractor import extract_entities_llm_batch, is_available as llm_available
    use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
    llm_ok = use_llm and llm_available()
    if llm_ok:
        log.info("EXTRACT: Using LLM extractor (Ollama)")
    else:
        log.info("EXTRACT: Using regex extractor (fallback)")
    known = em.load_known_entities()
    entities = em.load_json(ENTITIES_FILE)
    relationships = em.load_json(RELATIONSHIPS_FILE)
    total_extracted = 0
    new_entities = 0
    new_relationships = 0
    ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
    # Prepare texts for potential batch LLM processing
    event_texts = []
    for event in events:
        payload = event.get("payload", {})
        text = payload.get("text_preview", "") or payload.get("text", "")
        if isinstance(text, list):
            parts = []
            for t in text:
                parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
            text = " ".join(parts)
        if not isinstance(text, str):
            text = str(text)
        score = _importance(text) if text else 0.0
        event_texts.append((text, score))
    # LLM batch extraction for qualifying texts
    llm_results = {}
    if llm_ok:
        batch_texts = [t for t, s in event_texts if t and s >= 0.4]
        if batch_texts:
            consecutive_fails = 0
            for i in range(0, len(batch_texts), 10):
                if consecutive_fails >= 3:
                    log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
                    llm_ok = False
                    break
                chunk = batch_texts[i:i+10]
                batch_result = extract_entities_llm_batch(chunk)
                if batch_result:
                    llm_results.update(batch_result)
                    consecutive_fails = 0
                else:
                    consecutive_fails += 1
            if llm_results:
                log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
    for idx, event in enumerate(events):
        text, score = event_texts[idx]
        if not text or score < 0.4:
            continue
        if llm_ok and llm_results:
            # Use LLM results + known entity matching
            found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
            # Add LLM entities that appear in this text
            text_lower = text.lower()
            for name, info in llm_results.items():
                variants = [name, name.replace("-", " "), name.replace("-", "")]
                if any(v in text_lower for v in variants if len(v) > 2):
                    found[name] = info
        else:
            found = em.extract_entities(text, known)
        if not found:
            continue
        total_extracted += len(found)
        names = list(found.keys())
        for name, info in found.items():
            if name not in entities:
                entities[name] = {
                    "type": info["type"],
                    "source": "darkplex-loop",
                    "first_seen": ts_now,
                }
                new_entities += 1
                known[name] = entities[name]
        if len(names) >= 2:
            for i in range(len(names)):
                for j in range(i + 1, min(len(names), i + 5)):
                    a, b = min(names[i], names[j]), max(names[i], names[j])
                    key = f"{a}::{b}"
                    if key in relationships:
                        relationships[key]["count"] = relationships[key].get("count", 1) + 1
                        relationships[key]["last_seen"] = ts_now
                    else:
                        relationships[key] = {
                            "a": a, "b": b, "types": ["co-occurrence"],
                            "count": 1, "first_seen": ts_now, "last_seen": ts_now,
                        }
                        new_relationships += 1
    em.save_json(ENTITIES_FILE, entities)
    em.save_json(RELATIONSHIPS_FILE, relationships)
    state.entities_total = len(entities)
    state.relationships_total = len(relationships)
    state.entities_extracted_last = total_extracted
    state.entities_new_last = new_entities
    state.events_processed_last = len(events)
    log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
    return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
 def step_bridge(state: LoopState) -> dict:
    """Step 3: Run knowledge bridge."""
    log.info("STEP 3: BRIDGE — Syncing cortex outputs")
    bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
    if not bridge_script.exists():
        log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
        return {"status": "skipped", "reason": "script not found"}
    result = subprocess.run(
        [sys.executable, str(bridge_script), "sync"],
        capture_output=True, text=True, timeout=120,
    )
    if result.returncode != 0:
        log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
        return {"status": "failed", "error": result.stderr[:200]}
    bridged = 0
    for line in result.stdout.split("\n"):
        m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
        if m:
            bridged += int(m.group(1))
    log.info(f"BRIDGE: {bridged} items bridged")
    return {"status": "ok", "bridged": bridged}
 def step_verify(state: LoopState, extract_result: dict) -> dict:
    """Step 4: Verify output quality."""
    log.info("STEP 4: VERIFY — Checking output quality")
    issues = []
    for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
        if not f.exists():
            issues.append(f"{label} file missing")
        else:
            try:
                data = json.loads(f.read_text())
                if not data:
                    issues.append(f"{label} file is empty")
            except json.JSONDecodeError:
                issues.append(f"{label} file is corrupt JSON")
    events_processed = state.events_processed_last
    extracted = extract_result.get("extracted", 0)
    if events_processed > 10 and extracted == 0:
        issues.append(f"0 entities from {events_processed} events — extraction may be broken")
    try:
        r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
        if r.returncode != 0:
            issues.append("NATS unreachable")
    except Exception as e:
        issues.append(f"NATS check failed: {e}")
    verdict = "PASS" if not issues else "FAIL"
    log.info(f"VERIFY: {verdict} — {len(issues)} issues")
    for issue in issues:
        log.warning(f"  ⚠ {issue}")
    return {"verdict": verdict, "issues": issues}
 def step_report(state: LoopState, verify_result: dict):
    """Step 5: Alert if degraded/emergency."""
    if state.status == "RUNNING":
        return
    if not state.can_alert():
        log.info("REPORT: Alert cooldown active, skipping")
        return
    severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
    msg = (
        f"Darkplex Loop {severity}\n"
        f"Consecutive failures: {state.consecutive_failures}\n"
        f"Error: {state.error}\n"
        f"Issues: {', '.join(verify_result.get('issues', []))}"
    )
    log.warning(f"REPORT: Sending alert — {state.status}")
    try:
        subprocess.run(
            ["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
            capture_output=True, text=True, timeout=15,
        )
    except Exception:
        pass
    flag = LOG_DIR / "darkplex-loop-alert.flag"
    flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
    state.mark_alerted()
 # ── Helpers ──────────────────────────────────────────────────────────────────
 def _importance(text: str) -> float:
    """Importance scoring for event text."""
    if not text:
        return 0.0
    score = 0.3
    if len(text) > 200: score += 0.1
    if len(text) > 500: score += 0.1
    caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
    if caps > 3: score += 0.1
    if caps > 8: score += 0.1
    for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
        if p.lower() in text.lower():
            score -= 0.3
    for w in ["meeting", "project", "company", "contract", "decision", "strategy",
              "budget", "deadline", "milestone", "partnership", "investment", "revenue",
              "client", "proposal", "agreement"]:
        if w in text.lower():
            score += 0.05
    return max(0.0, min(1.0, score))
 def print_status():
    """Print current loop state."""
    state = LoopState()
    ent_count = rel_count = 0
    try:
        ent_count = len(json.loads(ENTITIES_FILE.read_text()))
    except Exception:
        pass
    try:
        rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
    except Exception:
        pass
    icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "⚪")
    print(f"{icon} Status:       {state.status}")
    print(f"Cycles:       {state.cycle_count}")
    print(f"Last cycle:   {state.last_cycle or 'never'}")
    print(f"Last success: {state.last_success or 'never'}")
    print(f"Last failure: {state.last_failure or 'never'}")
    print(f"Failures:     {state.consecutive_failures}")
    print(f"Entities:     {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
    print(f"Relationships:{rel_count} total")
    if state.error:
        print(f"Error:        {state.error}")
 # ── Main Loop ────────────────────────────────────────────────────────────────
 def _ms_since(t0: float) -> int:
    return int((time.monotonic() - t0) * 1000)
 def run_cycle(state: LoopState) -> bool:
    """Run one complete pipeline cycle. Returns True on success."""
    log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
    step_results = {}
    perf = {}
    t_cycle = time.monotonic()
    try:
        t0 = time.monotonic()
        ingest = step_ingest(state)
        perf["ingest_ms"] = _ms_since(t0)
        step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
        # Early skip if no new events
        if ingest.get("skip_reason") == "no_new_events":
            perf["total_ms"] = _ms_since(t_cycle)
            state.record_perf(perf)
            state.save()
            log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
            return True
        t0 = time.monotonic()
        extract = step_extract(state, ingest["events"])
        perf["extract_ms"] = _ms_since(t0)
        step_results["extract"] = extract
        t0 = time.monotonic()
        bridge = step_bridge(state)
        perf["bridge_ms"] = _ms_since(t0)
        step_results["bridge"] = bridge
        t0 = time.monotonic()
        verify = step_verify(state, extract)
        perf["verify_ms"] = _ms_since(t0)
        step_results["verify"] = verify
        perf["total_ms"] = _ms_since(t_cycle)
        state.record_perf(perf)
        if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
            state.record_failure("verify", "; ".join(verify["issues"]))
            step_report(state, verify)
            return False
        state.record_success(step_results)
        avgs = state.perf_averages()
        log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status} — {perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
        log.info(f"  Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
        flag = LOG_DIR / "darkplex-loop-alert.flag"
        if flag.exists():
            flag.unlink()
        return True
    except Exception as e:
        perf["total_ms"] = _ms_since(t_cycle)
        state.record_perf(perf)
        step_name = "unknown"
        for name in ["ingest", "extract", "bridge", "verify"]:
            if name not in step_results:
                step_name = name
                break
        log.error(f"CYCLE FAILED at {step_name}: {e}")
        log.error(traceback.format_exc())
        state.record_failure(step_name, str(e)[:300])
        step_report(state, {"issues": [str(e)]})
        return False
 def main():
    """CLI entry point for `darkplex loop`."""
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(message)s",
        handlers=[
            logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
            logging.StreamHandler(),
        ],
    )
    LOG_DIR.mkdir(parents=True, exist_ok=True)
    args = sys.argv[1:]
    if "--status" in args:
        print_status()
        return
    if "--check" in args:
        pending = check_new_events()
        if pending > 0:
            print(f"NEW: {pending} events pending")
            sys.exit(0)
        elif pending == 0:
            print("NONE: No new events")
            sys.exit(1)
        else:
            print("ERROR: Could not check")
            sys.exit(2)
    once = "--once" in args
    cycle_seconds = DEFAULT_CYCLE_SECONDS
    for i, arg in enumerate(args):
        if arg == "--cycle" and i + 1 < len(args):
            cycle_seconds = int(args[i + 1])
    state = LoopState()
    log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
    running = True
    def handle_signal(sig, frame):
        nonlocal running
        log.info("Shutdown signal received")
        running = False
    signal.signal(signal.SIGTERM, handle_signal)
    signal.signal(signal.SIGINT, handle_signal)
    while running:
        run_cycle(state)
        if once:
            break
        log.info(f"Sleeping {cycle_seconds}s until next cycle...")
        for _ in range(cycle_seconds):
            if not running:
                break
            time.sleep(1)
    log.info("Darkplex Loop stopped")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -3,9 +3,9 @@ requires = ["setuptools>=68.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
-name = "cortex"
+name = "darkplex-core"
-version = "0.1.0"
+version = "0.2.0"
-description = "Intelligence layer for OpenClaw — triage, health, feedback, memory hygiene, roadmap, validation"
+description = "Darkplex Intelligence Layer — triage, health, feedback, governance, knowledge extraction, memory hygiene, roadmap, validation"
 readme = "README.md"
 requires-python = ">=3.11"
 license = {text = "MIT"}
@ -15,6 +15,7 @@ authors = [
 [project.scripts]
 cortex = "cortex.cli:main"
 darkplex = "cortex.cli:main"
 [tool.setuptools.packages.find]
 include = ["cortex*"]
--- a/tests/test_anticipator.py
+++ b/tests/test_anticipator.py
@ -0,0 +1,106 @@
 """Tests for intelligence/anticipator module."""
 import sys
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 import pytest
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from cortex.intelligence.anticipator import (
    AlertSeverity,
    Anticipator,
    PatternDefinition,
    Prediction,
    _detect_recurring_errors,
    _detect_ssl_expiry,
    _detect_usage_spike,
 )
 class TestAnticipatorInit:
    def test_creates_with_builtin_patterns(self):
        a = Anticipator()
        assert len(a.patterns) == 3
    def test_register_custom_pattern(self):
        a = Anticipator()
        p = PatternDefinition(name="test", description="test", detector=lambda e: None)
        a.register_pattern(p)
        assert len(a.patterns) == 4
 class TestAnalyze:
    def test_empty_events(self):
        a = Anticipator()
        result = a.analyze([])
        assert result == []
    def test_no_matching_patterns(self):
        a = Anticipator()
        result = a.analyze([{"type": "unrelated", "data": {}}])
        assert result == []
    def test_detector_exception_handled(self):
        def bad_detector(events):
            raise RuntimeError("boom")
        a = Anticipator()
        a.patterns = [PatternDefinition(name="bad", description="", detector=bad_detector)]
        result = a.analyze([{}])
        assert result == []
 class TestSSLExpiry:
    def test_no_ssl_events(self):
        assert _detect_ssl_expiry([{"type": "other"}]) is None
    def test_expiring_soon(self):
        expiry = (datetime.now(timezone.utc) + timedelta(days=5)).isoformat()
        events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
        result = _detect_ssl_expiry(events)
        assert result is not None
        assert result.severity == AlertSeverity.WARNING
    def test_expiring_critical(self):
        expiry = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()
        events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
        result = _detect_ssl_expiry(events)
        assert result.severity == AlertSeverity.CRITICAL
    def test_not_expiring(self):
        expiry = (datetime.now(timezone.utc) + timedelta(days=60)).isoformat()
        events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
        assert _detect_ssl_expiry(events) is None
 class TestRecurringErrors:
    def test_no_errors(self):
        assert _detect_recurring_errors([]) is None
    def test_few_errors(self):
        events = [{"type": "error", "data": {"error_type": "timeout"}}] * 2
        assert _detect_recurring_errors(events) is None
    def test_recurring_detected(self):
        events = [{"type": "error", "data": {"error_type": "timeout"}}] * 5
        result = _detect_recurring_errors(events)
        assert result is not None
        assert result.metadata["count"] == 5
 class TestUsageSpike:
    def test_insufficient_data(self):
        assert _detect_usage_spike([]) is None
    def test_normal_usage(self):
        events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(15)]
        assert _detect_usage_spike(events) is None
    def test_spike_detected(self):
        events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(12)]
        events[-1]["data"]["value"] = 100
        events[-2]["data"]["value"] = 100
        events[-3]["data"]["value"] = 100
        result = _detect_usage_spike(events)
        assert result is not None
--- a/tests/test_collective.py
+++ b/tests/test_collective.py
@ -0,0 +1,112 @@
 """Tests for intelligence/collective module."""
 import asyncio
 import sys
 from pathlib import Path
 from unittest import mock
 import pytest
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from cortex.intelligence.shared_memory import Insight, SharedMemory, ALLOWED_AGENTS
 from cortex.intelligence.collective import AggregatedPattern, CollectiveLearning
 class TestCollectiveLearningInit:
    def test_init(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        assert cl._patterns == []
        assert len(cl._insights_by_topic) == 0
 class TestPatternDetection:
    def test_no_patterns_with_single_agent(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        # Add insights from same agent
        agent = list(ALLOWED_AGENTS)[0]
        for i in range(5):
            cl._insights_by_topic["infra"].append(
                Insight(agent=agent, topic="infra", content=f"test {i}")
            )
        cl._detect_patterns()
        assert len(cl._patterns) == 0
    def test_pattern_with_multiple_agents(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        agents = list(ALLOWED_AGENTS)[:2]
        cl._insights_by_topic["infra"].append(
            Insight(agent=agents[0], topic="infra", content="observation 1")
        )
        cl._insights_by_topic["infra"].append(
            Insight(agent=agents[1], topic="infra", content="observation 2")
        )
        cl._detect_patterns()
        assert len(cl._patterns) == 1
        assert cl._patterns[0].topic == "infra"
 class TestGetPatterns:
    def test_filter_by_topic(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        agents = list(ALLOWED_AGENTS)[:2]
        for topic in ["infra", "security"]:
            for agent in agents:
                cl._insights_by_topic[topic].append(
                    Insight(agent=agent, topic=topic, content="test")
                )
        cl._detect_patterns()
        assert len(cl.get_patterns(topic="infra")) == 1
    def test_filter_by_confidence(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        agents = list(ALLOWED_AGENTS)[:2]
        cl._insights_by_topic["low"].append(
            Insight(agent=agents[0], topic="low", content="x", confidence=0.1)
        )
        cl._insights_by_topic["low"].append(
            Insight(agent=agents[1], topic="low", content="y", confidence=0.1)
        )
        cl._detect_patterns()
        assert len(cl.get_patterns(min_confidence=0.5)) == 0
 class TestTopicSummary:
    def test_empty(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        assert cl.get_topic_summary() == {}
 class TestExportKnowledge:
    def test_export_json(self):
        import json
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        data = json.loads(cl.export_knowledge())
        assert "patterns" in data
        assert "topics" in data
        assert "allowed_agents" in data
 class TestHandleInsight:
    @pytest.mark.asyncio
    async def test_rejects_non_allowed_agent(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        insight = Insight(agent="unauthorized_agent", topic="test", content="bad")
        await cl._handle_insight(insight)
        assert len(cl._insights_by_topic) == 0
    @pytest.mark.asyncio
    async def test_accepts_allowed_agent(self):
        sm = mock.AsyncMock(spec=SharedMemory)
        cl = CollectiveLearning(sm)
        agent = list(ALLOWED_AGENTS)[0]
        insight = Insight(agent=agent, topic="test", content="good")
        await cl._handle_insight(insight)
        assert len(cl._insights_by_topic["test"]) == 1
--- a/tests/test_entity_manager.py
+++ b/tests/test_entity_manager.py
@ -0,0 +1,111 @@
 """Tests for entity_manager module."""
 import json
 import sys
 import tempfile
 from pathlib import Path
 from unittest import mock
 import pytest
 # Add parent to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 import cortex.entity_manager as em
 class TestNormalize:
    def test_basic(self):
        assert em.normalize("Hello World") == "hello world"
    def test_underscores(self):
        assert em.normalize("my_entity") == "my-entity"
    def test_whitespace(self):
        assert em.normalize("  test  ") == "test"
 class TestLoadJson:
    def test_missing_file(self):
        assert em.load_json(Path("/nonexistent/file.json")) == {}
    def test_valid_json(self, tmp_path):
        f = tmp_path / "test.json"
        f.write_text('{"key": "value"}')
        assert em.load_json(f) == {"key": "value"}
    def test_invalid_json(self, tmp_path):
        f = tmp_path / "bad.json"
        f.write_text("not json")
        assert em.load_json(f) == {}
 class TestSaveJson:
    def test_creates_dirs(self, tmp_path):
        f = tmp_path / "sub" / "dir" / "test.json"
        em.save_json(f, {"hello": "world"})
        assert json.loads(f.read_text()) == {"hello": "world"}
 class TestExtractEntities:
    def test_known_entity(self):
        known = {"acme-corp": {"type": "company"}}
        result = em.extract_entities("Working with Acme Corp today", known)
        assert "acme-corp" in result
    def test_mention(self):
        result = em.extract_entities("Talked to @johndoe about it", {})
        assert "johndoe" in result
        assert result["johndoe"]["type"] == "person"
    def test_capitalized_multi_word(self):
        result = em.extract_entities("Met with John Smith yesterday", {})
        assert "john smith" in result
    def test_acronym(self):
        result = em.extract_entities("The ACME project is going well", {})
        assert "acme" in result
        assert result["acme"]["type"] == "organization"
    def test_stop_words_filtered(self):
        result = em.extract_entities("The system is working fine", {})
        # None of these should be extracted as entities
        for word in ["the", "system", "working"]:
            assert word not in result
    def test_empty_text(self):
        result = em.extract_entities("", {})
        assert result == {}
    def test_short_mention_filtered(self):
        """Mentions shorter than 3 chars should be filtered."""
        result = em.extract_entities("@ab said hi", {})
        assert "ab" not in result
 class TestCmdBootstrap:
    def test_bootstrap_with_empty_areas(self, tmp_path):
        with mock.patch.object(em, "LIFE_AREAS", tmp_path):
            with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
                with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
                    em.cmd_bootstrap()
        assert (tmp_path / "entities.json").exists()
 class TestCmdRelate:
    def test_create_relationship(self, tmp_path):
        with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
            with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
                em.cmd_relate("Alice", "Bob", "colleague")
        rels = json.loads((tmp_path / "rels.json").read_text())
        assert len(rels) == 1
        key = list(rels.keys())[0]
        assert "colleague" in rels[key]["types"]
    def test_update_relationship(self, tmp_path):
        with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
            with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
                em.cmd_relate("Alice", "Bob", "colleague")
                em.cmd_relate("Alice", "Bob", "friend")
        rels = json.loads((tmp_path / "rels.json").read_text())
        key = list(rels.keys())[0]
        assert rels[key]["count"] == 2
--- a/tests/test_governance_enforcer.py
+++ b/tests/test_governance_enforcer.py
@ -0,0 +1,79 @@
 """Tests for governance/enforcer.py — Runtime Enforcer."""
 import sys
 from pathlib import Path
 import yaml
 import pytest
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
 from governance.enforcer import Enforcer, Decision
 from governance.policy import PolicyEngine
 from governance.risk_scorer import RiskScorer
 from governance.evidence import EvidenceCollector, ControlMapping
 def _make_enforcer(tmp_path, rules=None):
    if rules:
        policy_file = tmp_path / "test.yaml"
        policy_file.write_text(yaml.dump({
            "name": "test", "description": "", "version": "1",
            "rules": rules,
        }))
    engine = PolicyEngine(policies_dir=str(tmp_path))
    scorer = RiskScorer()
    collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
    return Enforcer(policy_engine=engine, risk_scorer=scorer, evidence_collector=collector)
 class TestDecision:
    def test_approved(self):
        from governance.risk_scorer import RiskResult
        d = Decision(verdict="approve", reason="ok", risk=RiskResult(0, "low", {}), policy_result={})
        assert d.approved
    def test_not_approved(self):
        from governance.risk_scorer import RiskResult
        d = Decision(verdict="deny", reason="no", risk=RiskResult(9, "critical", {}), policy_result={})
        assert not d.approved
 class TestEnforcer:
    def test_default_allow(self, tmp_path):
        enforcer = _make_enforcer(tmp_path)
        decision = enforcer.evaluate({"agent": "claudia", "action": "read", "hour": 12})
        assert decision.verdict == "allow"
    def test_policy_deny(self, tmp_path):
        enforcer = _make_enforcer(tmp_path, rules=[
            {"name": "deny-ext", "conditions": {"target": "external"}, "effect": "deny", "priority": 10},
        ])
        decision = enforcer.evaluate({"agent": "claudia", "action": "send", "target": "external", "hour": 12})
        assert decision.verdict == "deny"
    def test_risk_override(self, tmp_path):
        """High risk should override an allow policy to escalate."""
        enforcer = _make_enforcer(tmp_path, rules=[
            {"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1},
        ])
        decision = enforcer.evaluate({
            "agent": "claudia", "action": "export",
            "data_type": "restricted", "target": "external", "hour": 12,
        })
        # Risk should be high/critical, overriding the allow
        assert decision.verdict in ("deny", "escalate")
    def test_evidence_recorded(self, tmp_path):
        enforcer = _make_enforcer(tmp_path)
        enforcer.evaluate({"agent": "test", "action": "read", "hour": 12})
        assert len(enforcer.evidence_collector.evidence) == 1
    def test_data_classification_alias(self, tmp_path):
        enforcer = _make_enforcer(tmp_path)
        decision = enforcer.evaluate({
            "agent": "test", "action": "read",
            "data_classification": "confidential", "hour": 12,
        })
        # Should use data_classification as data_type
        assert decision.risk.factors["data_type"]["value"] == "confidential"
--- a/tests/test_governance_evidence.py
+++ b/tests/test_governance_evidence.py
@ -0,0 +1,86 @@
 """Tests for governance/evidence.py — Evidence Collector & Control Mapping."""
 import json
 import sys
 from pathlib import Path
 import yaml
 import pytest
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
 from governance.evidence import EvidenceCollector, EvidenceRecord, ControlMapping
 class TestControlMapping:
    def test_missing_file(self):
        m = ControlMapping(mapping_path="/nonexistent/path.yaml")
        assert m.get_controls("anything") == []
    def test_load_mapping(self, tmp_path):
        mapping_file = tmp_path / "mapping.yaml"
        mapping_file.write_text(yaml.dump({
            "mappings": [
                {"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]},
                {"event_types": ["access_request", "data_export"], "controls": ["A.9.1"]},
            ]
        }))
        m = ControlMapping(mapping_path=str(mapping_file))
        assert "A.5.1" in m.get_controls("policy_evaluation")
        assert "A.9.1" in m.get_controls("access_request")
        assert m.get_controls("unknown_event") == []
 class TestEvidenceRecord:
    def test_to_dict(self):
        r = EvidenceRecord(
            timestamp="2026-01-01T00:00:00Z",
            event_type="test",
            agent="claudia",
            action="read",
            verdict="allow",
            risk_score=2,
            risk_level="low",
            controls=["A.5.1"],
        )
        d = r.to_dict()
        assert d["agent"] == "claudia"
        assert d["controls"] == ["A.5.1"]
 class TestEvidenceCollector:
    def setup_method(self):
        self.collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
    def test_record(self):
        rec = self.collector.record(
            event_type="policy_evaluation",
            agent="claudia",
            action="send_email",
            verdict="allow",
            risk_score=3,
            risk_level="low",
        )
        assert rec.agent == "claudia"
        assert len(self.collector.evidence) == 1
    def test_filter_by_agent(self):
        self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
        self.collector.record(event_type="e", agent="b", action="x", verdict="deny")
        assert len(self.collector.get_evidence(agent="a")) == 1
    def test_filter_by_verdict(self):
        self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
        self.collector.record(event_type="e", agent="a", action="y", verdict="deny")
        assert len(self.collector.get_evidence(verdict="deny")) == 1
    def test_export_json(self):
        self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
        exported = self.collector.export_json()
        data = json.loads(exported)
        assert len(data) == 1
        assert data[0]["agent"] == "a"
    def test_empty_evidence(self):
        assert self.collector.get_evidence() == []
        assert json.loads(self.collector.export_json()) == []
--- a/tests/test_governance_policy.py
+++ b/tests/test_governance_policy.py
@ -0,0 +1,126 @@
 """Tests for governance/policy.py — Policy Engine.
 NOTE: This module exists only in darkplex-core. Tests written against the module API.
 """
 import os
 import tempfile
 import pytest
 from pathlib import Path
 # We need yaml for creating test fixtures
 import yaml
 def _write_policy(tmpdir, filename, data):
    path = Path(tmpdir) / filename
    path.write_text(yaml.dump(data))
    return path
 class TestRule:
    def setup_method(self):
        import sys
        sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
        from governance.policy import Rule
        self.Rule = Rule
    def test_matches_simple(self):
        r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
        assert r.matches({"agent": "claudia"})
    def test_no_match(self):
        r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
        assert not r.matches({"agent": "other"})
    def test_missing_key(self):
        r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
        assert not r.matches({})
    def test_list_condition(self):
        r = self.Rule(name="r1", conditions={"action": ["read", "write"]}, effect="allow")
        assert r.matches({"action": "read"})
        assert not r.matches({"action": "delete"})
    def test_multiple_conditions(self):
        r = self.Rule(name="r1", conditions={"agent": "claudia", "action": "send"}, effect="deny")
        assert r.matches({"agent": "claudia", "action": "send"})
        assert not r.matches({"agent": "claudia", "action": "read"})
 class TestPolicyEngine:
    def setup_method(self):
        import sys
        sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
        from governance.policy import PolicyEngine
        self.PolicyEngine = PolicyEngine
    def test_empty_dir(self, tmp_path):
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        assert engine.policies == []
    def test_nonexistent_dir(self, tmp_path):
        engine = self.PolicyEngine(policies_dir=str(tmp_path / "nope"))
        assert engine.policies == []
    def test_load_policy(self, tmp_path):
        _write_policy(tmp_path, "test.yaml", {
            "name": "test-policy",
            "description": "Test",
            "version": "1.0.0",
            "rules": [
                {"name": "deny-external", "conditions": {"target": "external"}, "effect": "deny", "priority": 10},
            ],
        })
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        assert len(engine.policies) == 1
        assert engine.policies[0].name == "test-policy"
        assert len(engine.policies[0].rules) == 1
    def test_evaluate_no_match(self, tmp_path):
        _write_policy(tmp_path, "test.yaml", {
            "name": "p", "description": "", "version": "1",
            "rules": [{"name": "r1", "conditions": {"agent": "x"}, "effect": "deny"}],
        })
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        result = engine.evaluate({"agent": "y"})
        assert result["verdict"] == "allow"
    def test_evaluate_match_deny(self, tmp_path):
        _write_policy(tmp_path, "test.yaml", {
            "name": "p", "description": "", "version": "1",
            "rules": [{"name": "r1", "conditions": {"target": "external"}, "effect": "deny", "priority": 5}],
        })
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        result = engine.evaluate({"target": "external"})
        assert result["verdict"] == "deny"
    def test_priority_ordering(self, tmp_path):
        _write_policy(tmp_path, "test.yaml", {
            "name": "p", "description": "", "version": "1",
            "rules": [
                {"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1},
                {"name": "deny-ext", "conditions": {"agent": "claudia"}, "effect": "deny", "priority": 10},
            ],
        })
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        result = engine.evaluate({"agent": "claudia"})
        assert result["verdict"] == "deny"  # higher priority wins
    def test_reload(self, tmp_path):
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        assert len(engine.policies) == 0
        _write_policy(tmp_path, "new.yaml", {
            "name": "new", "description": "", "version": "1", "rules": [],
        })
        engine.reload()
        assert len(engine.policies) == 1
    def test_skips_schema_yaml(self, tmp_path):
        _write_policy(tmp_path, "schema.yaml", {"name": "schema"})
        _write_policy(tmp_path, "real.yaml", {
            "name": "real", "description": "", "version": "1", "rules": [],
        })
        engine = self.PolicyEngine(policies_dir=str(tmp_path))
        assert len(engine.policies) == 1
        assert engine.policies[0].name == "real"
--- a/tests/test_governance_report.py
+++ b/tests/test_governance_report.py
@ -0,0 +1,57 @@
 """Tests for governance/report_generator.py."""
 import json
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
 from governance.evidence import EvidenceCollector, ControlMapping
 from governance.report_generator import ReportGenerator
 class TestReportGenerator:
    def _collector_with_mapping(self, tmp_path):
        import yaml
        mapping_file = tmp_path / "mapping.yaml"
        mapping_file.write_text(yaml.dump({
            "mappings": [
                {"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]},
            ]
        }))
        return EvidenceCollector(control_mapping=ControlMapping(str(mapping_file)))
    def test_empty_report(self):
        collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
        gen = ReportGenerator(collector)
        report = gen.generate_compliance_report()
        assert report["status"] == "no_evidence"
    def test_report_with_evidence(self, tmp_path):
        collector = self._collector_with_mapping(tmp_path)
        collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow", risk_score=2, risk_level="low")
        collector.record(event_type="policy_evaluation", agent="claudia", action="write", verdict="deny", risk_score=8, risk_level="high")
        gen = ReportGenerator(collector)
        report = gen.generate_compliance_report()
        assert report["total_evidence"] == 2
        assert "A.5.1" in report["controls_covered"]
        assert report["summary"]["total_deny"] == 1
        assert report["summary"]["high_risk_events"] == 1
    def test_agent_report(self, tmp_path):
        collector = self._collector_with_mapping(tmp_path)
        collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow")
        collector.record(event_type="policy_evaluation", agent="other", action="read", verdict="deny")
        gen = ReportGenerator(collector)
        report = gen.generate_agent_report("claudia")
        assert report["agent"] == "claudia"
        assert report["total_actions"] == 1
    def test_export_json(self):
        collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
        gen = ReportGenerator(collector)
        output = gen.export_json()
        data = json.loads(output)
        assert "status" in data  # empty report
--- a/tests/test_governance_risk_scorer.py
+++ b/tests/test_governance_risk_scorer.py
@ -0,0 +1,80 @@
 """Tests for governance/risk_scorer.py."""
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
 from governance.risk_scorer import RiskScorer, RiskResult, _classify_level
 class TestClassifyLevel:
    def test_low(self):
        assert _classify_level(0) == "low"
        assert _classify_level(3) == "low"
    def test_elevated(self):
        assert _classify_level(4) == "elevated"
        assert _classify_level(6) == "elevated"
    def test_high(self):
        assert _classify_level(7) == "high"
        assert _classify_level(8) == "high"
    def test_critical(self):
        assert _classify_level(9) == "critical"
        assert _classify_level(10) == "critical"
 class TestRiskResult:
    def test_acceptable(self):
        r = RiskResult(value=3, level="low", factors={})
        assert r.is_acceptable
    def test_not_acceptable(self):
        r = RiskResult(value=7, level="high", factors={})
        assert not r.is_acceptable
 class TestRiskScorer:
    def setup_method(self):
        self.scorer = RiskScorer()
    def test_default_low_risk(self):
        result = self.scorer.score({"hour": 12})
        assert result.level == "low"
        assert result.is_acceptable
    def test_public_internal(self):
        result = self.scorer.score({"data_type": "public", "target": "internal", "hour": 12})
        assert result.value <= 3
    def test_confidential_external(self):
        result = self.scorer.score({"data_type": "confidential", "target": "external", "hour": 12})
        assert result.value >= 7
    def test_restricted_critical(self):
        result = self.scorer.score({"data_type": "restricted", "target": "external", "hour": 12})
        assert result.level in ("high", "critical")
    def test_off_hours_bonus(self):
        day = self.scorer.score({"data_type": "internal", "hour": 12})
        night = self.scorer.score({"data_type": "internal", "hour": 2})
        assert night.value > day.value
    def test_admin_role_reduces_risk(self):
        admin = self.scorer.score({"agent_role": "admin", "hour": 12})
        external = self.scorer.score({"agent_role": "external", "hour": 12})
        assert admin.value < external.value
    def test_factors_populated(self):
        result = self.scorer.score({"data_type": "internal", "target": "external", "hour": 10})
        assert "data_type" in result.factors
        assert "target" in result.factors
        assert "agent_role" in result.factors
        assert "time_of_day" in result.factors
    def test_clamped_0_10(self):
        # Even with extreme values, should be 0-10
        result = self.scorer.score({"data_type": "restricted", "target": "external", "agent_role": "external", "hour": 3})
        assert 0 <= result.value <= 10
--- a/tests/test_knowledge_cleanup.py
+++ b/tests/test_knowledge_cleanup.py
@ -0,0 +1,136 @@
 """Tests for intelligence/knowledge_cleanup.py — Knowledge Graph Cleanup."""
 import json
 import math
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
 import knowledge_cleanup as kc
 class TestBackup:
    def test_creates_backup(self, tmp_path):
        src = tmp_path / "test.json"
        src.write_text('{"a": 1}')
        backup_path = kc.backup(src)
        assert backup_path.exists()
        assert "backup_" in backup_path.name
 class TestAtomicWrite:
    def test_writes_atomically(self, tmp_path):
        path = tmp_path / "out.json"
        kc.atomic_write(path, {"key": "value"})
        assert json.loads(path.read_text()) == {"key": "value"}
 class TestFindDuplicates:
    def test_no_duplicates(self):
        entities = {"albert": {}, "mondo-gate": {}}
        groups = kc.find_duplicates(entities)
        assert len(groups) == 0
    def test_case_duplicates(self):
        entities = {"Albert": {}, "albert": {}, "ALBERT": {}}
        groups = kc.find_duplicates(entities)
        assert len(groups) >= 1
    def test_substring_duplicates(self):
        entities = {"mondo": {"type": "company"}, "mondo gate": {"type": "company"}}
        groups = kc.find_duplicates(entities)
        assert len(groups) >= 1
 class TestPickCanonical:
    def test_prefers_uppercase(self):
        names = ["albert", "Albert"]
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
        assert kc.pick_canonical(names, entities) == "Albert"
    def test_prefers_more_fields(self):
        names = ["a", "A"]
        entities = {"a": {"type": "person"}, "A": {"type": "person", "source": "x", "extra": "y"}}
        assert kc.pick_canonical(names, entities) == "A"
 class TestDeduplicate:
    def test_merges_entities(self):
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
        rels = {}
        e, r = kc.deduplicate(entities, rels, dry_run=False)
        assert len(e) == 1
    def test_dry_run_no_change(self):
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
        rels = {}
        e, r = kc.deduplicate(entities, rels, dry_run=True)
        assert len(e) == 2  # unchanged in dry run
    def test_updates_relationships(self):
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
        rels = {
            "albert::mondo": {"a": "albert", "b": "mondo", "types": ["co-occurrence"], "count": 1, "first_seen": "2026-01-01", "last_seen": "2026-01-01"},
        }
        e, r = kc.deduplicate(entities, rels, dry_run=False)
        # Relationship should be remapped to canonical
        assert len(r) == 1
 class TestScoreRelationships:
    def test_scores_assigned(self):
        rels = {
            "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat(), "first_seen": "2026-01-01"},
        }
        result = kc.score_relationships(rels, dry_run=False)
        assert "strength" in result["a::b"]
        assert 0 < result["a::b"]["strength"] <= 1
    def test_removes_weak(self):
        old_date = (datetime.now() - timedelta(days=300)).isoformat()
        rels = {
            "a::b": {"count": 1, "types": ["co-occurrence"], "last_seen": old_date, "first_seen": old_date},
        }
        result = kc.score_relationships(rels, dry_run=False)
        # Very old + low count should have low strength
        if len(result) > 0:
            assert result["a::b"]["strength"] < 0.3
    def test_dry_run(self):
        rels = {
            "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat()},
        }
        result = kc.score_relationships(rels, dry_run=True)
        assert "strength" not in result["a::b"]
 class TestClassifyUnknowns:
    @patch("knowledge_cleanup.ollama_generate")
    def test_no_unknowns(self, mock_ollama):
        entities = {"albert": {"type": "person"}}
        result = kc.classify_unknowns(entities, dry_run=False)
        mock_ollama.assert_not_called()
        assert result == entities
    @patch("knowledge_cleanup.ollama_generate")
    def test_classifies_unknowns(self, mock_ollama):
        mock_ollama.return_value = '{"1": "person"}'
        entities = {"albert": {"type": "unknown"}}
        result = kc.classify_unknowns(entities, dry_run=False)
        assert result["albert"]["type"] == "person"
    @patch("knowledge_cleanup.ollama_generate")
    def test_dry_run_no_change(self, mock_ollama):
        mock_ollama.return_value = '{"1": "person"}'
        entities = {"albert": {"type": "unknown"}}
        result = kc.classify_unknowns(entities, dry_run=True)
        assert result["albert"]["type"] == "unknown"
    @patch("knowledge_cleanup.ollama_generate")
    def test_handles_llm_failure(self, mock_ollama):
        mock_ollama.side_effect = Exception("timeout")
        entities = {"albert": {"type": "unknown"}}
        result = kc.classify_unknowns(entities, dry_run=False)
        assert result["albert"]["type"] == "unknown"  # unchanged
--- a/tests/test_knowledge_extractor.py
+++ b/tests/test_knowledge_extractor.py
@ -0,0 +1,61 @@
 """Tests for knowledge_extractor.py (darkplex-core root) — Smart Extractor."""
 import sys
 from pathlib import Path
 from unittest.mock import patch
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
 from knowledge_extractor import importance_heuristic, parse_since
 class TestImportanceHeuristic:
    def test_empty(self):
        assert importance_heuristic("") == 0.0
        assert importance_heuristic(None) == 0.0
    def test_short_text(self):
        score = importance_heuristic("Hello world")
        assert 0 < score <= 1.0
    def test_long_text_boosted(self):
        short = importance_heuristic("Hello")
        long = importance_heuristic("x " * 300)
        assert long > short
    def test_heartbeat_penalized(self):
        score = importance_heuristic("HEARTBEAT_OK system running fine no issues detected at all")
        assert score < 0.3
    def test_business_boosted(self):
        score = importance_heuristic("Meeting about the project deadline and contract with the client partnership")
        assert score > 0.4
    def test_capitalized_names_boost(self):
        text = "Albert discussed with Thomas, Sarah, Michael, Peter, Franz, and Maria about the Company"
        score = importance_heuristic(text)
        assert score > 0.4
    def test_clamped(self):
        # Even extreme texts should be 0-1
        score = importance_heuristic("cron: heartbeat HEARTBEAT_OK health check no critical")
        assert 0 <= score <= 1.0
 class TestParseSince:
    def test_hours(self):
        ts = parse_since("6h")
        assert ts is not None
        assert ts > 0
    def test_days(self):
        ts = parse_since("1d")
        assert ts is not None
    def test_minutes(self):
        ts = parse_since("30m")
        assert ts is not None
    def test_invalid(self):
        assert parse_since("abc") is None
        assert parse_since("") is None
--- a/tests/test_llm_extractor.py
+++ b/tests/test_llm_extractor.py
@ -0,0 +1,147 @@
 """Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor."""
 import json
 import sys
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
 from llm_extractor import (
    _parse_json_response,
    _normalize_entities,
    extract_entities_llm,
    extract_entities_llm_batch,
    is_available,
    VALID_TYPES,
 )
 class TestParseJsonResponse:
    def test_empty(self):
        assert _parse_json_response("") == {}
        assert _parse_json_response(None) == {}
    def test_plain_json(self):
        r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}')
        assert "albert" in r
    def test_markdown_fenced(self):
        r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```')
        assert "albert" in r
    def test_no_json(self):
        assert _parse_json_response("no json here") == {}
    def test_nested_braces(self):
        r = _parse_json_response('{"a": {"type": "person", "context": "test"}}')
        assert "a" in r
 class TestNormalizeEntities:
    def test_valid_entity(self):
        raw = {"Albert": {"type": "person", "context": "CEO of company"}}
        result = _normalize_entities(raw)
        assert "albert" in result
        assert result["albert"]["type"] == "person"
        assert result["albert"]["match"] == "llm"
    def test_type_alias(self):
        raw = {"python": {"type": "language", "context": "programming"}}
        result = _normalize_entities(raw)
        assert result["python"]["type"] == "technology"
    def test_unknown_type_becomes_concept(self):
        raw = {"thing": {"type": "xyzzy", "context": "unknown"}}
        result = _normalize_entities(raw)
        assert result["thing"]["type"] == "concept"
    def test_filters_short_names(self):
        raw = {"x": {"type": "person", "context": "test"}}
        result = _normalize_entities(raw)
        assert len(result) == 0
    def test_filters_long_names(self):
        raw = {"a" * 81: {"type": "person", "context": "test"}}
        result = _normalize_entities(raw)
        assert len(result) == 0
    def test_non_dict_info_skipped(self):
        raw = {"test": "not a dict"}
        result = _normalize_entities(raw)
        assert len(result) == 0
    def test_context_truncated(self):
        raw = {"test": {"type": "person", "context": "x" * 200}}
        result = _normalize_entities(raw)
        assert len(result["test"]["context"]) <= 100
    def test_underscores_to_hyphens(self):
        raw = {"mondo_gate": {"type": "company", "context": "test"}}
        result = _normalize_entities(raw)
        assert "mondo-gate" in result
 class TestExtractEntitiesLlm:
    @patch("llm_extractor._call_ollama")
    def test_empty_text(self, mock_ollama):
        assert extract_entities_llm("") == {}
        assert extract_entities_llm("short") == {}
        mock_ollama.assert_not_called()
    @patch("llm_extractor._call_ollama")
    def test_ollama_unavailable(self, mock_ollama):
        mock_ollama.return_value = None
        result = extract_entities_llm("This is a test about Albert and Mondo Gate AG")
        assert result is None  # signals fallback
    @patch("llm_extractor._call_ollama")
    def test_successful_extraction(self, mock_ollama):
        mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}'
        result = extract_entities_llm("Albert discussed the project with the team members today")
        assert "albert" in result
        assert result["albert"]["type"] == "person"
    @patch("llm_extractor._call_ollama")
    def test_truncates_long_text(self, mock_ollama):
        mock_ollama.return_value = "{}"
        extract_entities_llm("x" * 3000)
        call_args = mock_ollama.call_args[0][0]
        # The text in the prompt should be truncated
        assert len(call_args) < 3000 + 500  # prompt overhead
 class TestExtractEntitiesLlmBatch:
    @patch("llm_extractor._call_ollama")
    def test_empty_list(self, mock_ollama):
        assert extract_entities_llm_batch([]) == {}
        mock_ollama.assert_not_called()
    @patch("llm_extractor._call_ollama")
    def test_filters_short_texts(self, mock_ollama):
        mock_ollama.return_value = "{}"
        result = extract_entities_llm_batch(["hi", "yo", ""])
        assert result == {}
        mock_ollama.assert_not_called()
    @patch("llm_extractor._call_ollama")
    def test_batch_extraction(self, mock_ollama):
        mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}'
        result = extract_entities_llm_batch(["Python is a great programming language for data science"])
        assert "python" in result
 class TestIsAvailable:
    @patch("llm_extractor.urllib.request.urlopen")
    def test_available(self, mock_urlopen):
        mock_resp = MagicMock()
        mock_resp.status = 200
        mock_resp.__enter__ = MagicMock(return_value=mock_resp)
        mock_resp.__exit__ = MagicMock(return_value=False)
        mock_urlopen.return_value = mock_resp
        assert is_available() is True
    @patch("llm_extractor.urllib.request.urlopen")
    def test_unavailable(self, mock_urlopen):
        mock_urlopen.side_effect = Exception("connection refused")
        assert is_available() is False
--- a/tests/test_loop.py
+++ b/tests/test_loop.py
@ -0,0 +1,119 @@
 """Tests for intelligence/loop.py — Darkplex Loop state machine and helpers."""
 import json
 import sys
 import time
 from datetime import datetime, timezone, timedelta
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
 import loop as darkplex_loop
 class TestImportance:
    def test_empty(self):
        assert darkplex_loop._importance("") == 0.0
    def test_heartbeat_low(self):
        assert darkplex_loop._importance("HEARTBEAT_OK all systems nominal") < 0.2
    def test_business_content_high(self):
        score = darkplex_loop._importance("Meeting about the project deadline and budget milestone")
        assert score > 0.4
    def test_clamped(self):
        for text in ["", "x" * 1000, "meeting project company contract decision strategy"]:
            s = darkplex_loop._importance(text)
            assert 0.0 <= s <= 1.0
 class TestLoopState:
    def test_init(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            assert state.status == "INIT"
            assert state.cycle_count == 0
    def test_save_and_load(self, tmp_path):
        sf = tmp_path / "state.json"
        with patch.object(darkplex_loop, 'STATE_FILE', sf):
            state = darkplex_loop.LoopState()
            state.status = "RUNNING"
            state.cycle_count = 5
            state.save()
            state2 = darkplex_loop.LoopState()
            assert state2.status == "RUNNING"
            assert state2.cycle_count == 5
    def test_record_success(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            state.record_success({"test": "ok"})
            assert state.status == "RUNNING"
            assert state.consecutive_failures == 0
            assert state.cycle_count == 1
    def test_record_failure_degraded(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            state.record_failure("ingest", "timeout")
            assert state.status == "DEGRADED"
            assert state.consecutive_failures == 1
    def test_record_failure_emergency(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            for i in range(3):
                state.record_failure("ingest", "timeout")
            assert state.status == "EMERGENCY"
    def test_can_alert(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            assert state.can_alert()
            state.mark_alerted()
            assert not state.can_alert()
    def test_record_perf(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            state.record_perf({"total_ms": 1000, "ingest_ms": 200})
            assert state.perf["total_ms"] == 1000
            assert len(state.perf_history) == 1
    def test_perf_averages(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            state.record_perf({"total_ms": 1000})
            state.record_perf({"total_ms": 2000})
            avgs = state.perf_averages()
            assert avgs["total_ms"] == 1500
    def test_perf_history_capped(self, tmp_path):
        with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
            state = darkplex_loop.LoopState()
            for i in range(15):
                state.record_perf({"total_ms": i * 100})
            assert len(state.perf_history) == 10
 class TestCheckNewEvents:
    @patch("loop.subprocess.run")
    def test_returns_pending(self, mock_run):
        mock_run.return_value = MagicMock(
            returncode=0, stdout=json.dumps({"num_pending": 42})
        )
        assert darkplex_loop.check_new_events() == 42
    @patch("loop.subprocess.run")
    def test_returns_negative_on_failure(self, mock_run):
        mock_run.return_value = MagicMock(returncode=1, stdout="")
        assert darkplex_loop.check_new_events() == -1
    @patch("loop.subprocess.run")
    def test_handles_exception(self, mock_run):
        mock_run.side_effect = Exception("nats not found")
        assert darkplex_loop.check_new_events() == -1
--- a/tests/test_shared_memory.py
+++ b/tests/test_shared_memory.py
@ -0,0 +1,72 @@
 """Tests for intelligence/shared_memory module."""
 import json
 import sys
 from pathlib import Path
 import pytest
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory
 class TestInsight:
    def test_creation(self):
        i = Insight(agent="claudia", topic="test", content="hello")
        assert i.agent == "claudia"
        assert i.timestamp  # auto-set
    def test_to_json(self):
        i = Insight(agent="claudia", topic="test", content="hello")
        data = json.loads(i.to_json())
        assert data["agent"] == "claudia"
        assert data["topic"] == "test"
    def test_from_json(self):
        i = Insight(agent="claudia", topic="test", content="hello", confidence=0.9)
        i2 = Insight.from_json(i.to_json())
        assert i2.agent == i.agent
        assert i2.confidence == 0.9
    def test_default_confidence(self):
        i = Insight(agent="claudia", topic="t", content="c")
        assert i.confidence == 0.8
    def test_tags(self):
        i = Insight(agent="claudia", topic="t", content="c", tags=["a", "b"])
        assert len(i.tags) == 2
 class TestSharedMemory:
    def test_allowed_agent(self):
        agent = list(ALLOWED_AGENTS)[0]
        sm = SharedMemory(agent_name=agent)
        assert sm.agent_name == agent
    def test_disallowed_agent(self):
        with pytest.raises(ValueError, match="not allowed"):
            SharedMemory(agent_name="hacker_bot")
    def test_not_connected_publish(self):
        agent = list(ALLOWED_AGENTS)[0]
        sm = SharedMemory(agent_name=agent)
        with pytest.raises(RuntimeError, match="Not connected"):
            import asyncio
            asyncio.get_event_loop().run_until_complete(
                sm.publish(Insight(agent=agent, topic="t", content="c"))
            )
    def test_not_connected_subscribe(self):
        agent = list(ALLOWED_AGENTS)[0]
        sm = SharedMemory(agent_name=agent)
        with pytest.raises(RuntimeError, match="Not connected"):
            import asyncio
            asyncio.get_event_loop().run_until_complete(
                sm.subscribe("t", lambda x: None)
            )
 class TestAllowedAgents:
    def test_default_agents(self):
        assert "claudia" in ALLOWED_AGENTS
        assert len(ALLOWED_AGENTS) >= 1
--- a/tests/test_temporal.py
+++ b/tests/test_temporal.py
@ -0,0 +1,77 @@
 """Tests for intelligence/temporal.py — Temporal Context API."""
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
 from temporal import TemporalEntry, TemporalQuery, TemporalContext
 class TestTemporalEntry:
    def test_creation(self):
        e = TemporalEntry(
            timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc),
            source="nats",
            topic="ssl-cert",
            content="SSL cert expiring",
        )
        assert e.source == "nats"
        assert e.relevance_score == 0.0
    def test_metadata(self):
        e = TemporalEntry(
            timestamp=datetime.now(timezone.utc),
            source="chromadb",
            topic="test",
            content="test",
            metadata={"key": "value"},
            relevance_score=0.95,
        )
        assert e.metadata["key"] == "value"
        assert e.relevance_score == 0.95
 class TestTemporalQuery:
    def test_defaults(self):
        q = TemporalQuery(topic="test")
        assert q.limit == 50
        assert "nats" in q.sources
        assert "chromadb" in q.sources
    def test_custom(self):
        q = TemporalQuery(
            topic="ssl",
            start_time=datetime(2026, 1, 1, tzinfo=timezone.utc),
            limit=10,
            sources=["nats"],
        )
        assert q.limit == 10
        assert len(q.sources) == 1
 class TestTemporalContext:
    def test_init_defaults(self):
        ctx = TemporalContext()
        assert "localhost" in ctx.nats_url
        assert "localhost" in ctx.chromadb_url
    def test_init_custom(self):
        ctx = TemporalContext(nats_url="nats://custom:4222", chromadb_url="http://custom:8000")
        assert ctx.nats_url == "nats://custom:4222"
    @pytest.mark.asyncio
    async def test_query_no_connections(self):
        ctx = TemporalContext()
        # No connections established, should return empty
        result = await ctx.query(TemporalQuery(topic="test"))
        assert result == []
    @pytest.mark.asyncio
    async def test_close_no_connection(self):
        ctx = TemporalContext()
        await ctx.close()  # Should not raise