diff --git a/.gitignore b/.gitignore index 35b69ca..698f2da 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist/ build/ .eggs/ .pytest_cache/ +.coverage diff --git a/cortex/entity_manager.py b/cortex/entity_manager.py new file mode 100755 index 0000000..cfbf76c --- /dev/null +++ b/cortex/entity_manager.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python3 +""" +Entity Manager — File-based knowledge graph for entity extraction and relationship mapping. +Part of Level 4.4 AGI Roadmap. + +Usage: + entity-manager.py bootstrap — Bootstrap from life/areas/ + entity-manager.py extract "text" — Extract entities from text + entity-manager.py relate "A" "B" [type] — Create/update relationship + entity-manager.py query "entity" — Query relationships for entity + entity-manager.py graph — Output relationship summary +""" + +import sys +import os +import json +import re +import time +from pathlib import Path + +KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge" +ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json" +RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json" +LIFE_AREAS = Path.home() / "life" / "areas" + +# Common words to skip during entity extraction +STOP_WORDS = { + "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", + "have", "has", "had", "do", "does", "did", "will", "would", "could", + "should", "may", "might", "shall", "can", "need", "must", "i", "you", + "he", "she", "it", "we", "they", "me", "him", "her", "us", "them", + "my", "your", "his", "its", "our", "their", "this", "that", "these", + "those", "what", "which", "who", "whom", "where", "when", "why", "how", + "all", "each", "every", "both", "few", "more", "most", "other", "some", + "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", + "very", "just", "because", "as", "until", "while", "of", "at", "by", + "for", "with", "about", "against", "between", "through", "during", + "before", "after", "above", "below", "to", "from", "up", "down", "in", + "out", "on", "off", "over", "under", "again", "further", "then", "once", + "here", "there", "and", "but", "or", "if", "then", "else", "also", + "system", "cron", "heartbeat", "ok", "error", "warning", "info", + "message", "session", "agent", "main", "matrix", "telegram", + "read", "write", "check", "run", "send", "get", "set", "let", "see", + "know", "think", "want", "like", "make", "take", "come", "go", "say", + "tell", "ask", "try", "use", "find", "give", "new", "good", "first", + "last", "long", "great", "little", "right", "big", "high", "old", + "different", "small", "large", "next", "early", "young", "important", + "public", "bad", "sure", "sure", "yes", "no", "maybe", "ok", "okay", + "thanks", "thank", "please", "hello", "hi", "hey", "bye", "well", + "now", "today", "tomorrow", "yesterday", "monday", "tuesday", + "wednesday", "thursday", "friday", "saturday", "sunday", + "january", "february", "march", "april", "may", "june", "july", + "august", "september", "october", "november", "december", + "still", "already", "currently", "actually", "really", "right", + "look", "keep", "going", "based", "done", "work", "working", +} + + +def normalize(name): + """Normalize entity name.""" + return name.strip().lower().replace("_", "-") + + +def load_json(path): + """Load JSON file, return empty dict if missing/invalid.""" + try: + with open(path) as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + return {} + + +def save_json(path, data): + """Save JSON file, creating directories as needed.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def load_known_entities(): + """Load known entity names from life/areas/ and entities.json.""" + known = {} + # From life/areas + for category in ["people", "companies", "projects"]: + area_dir = LIFE_AREAS / category + if not area_dir.exists(): + continue + etype = category.rstrip("s") # person, company, project + if category == "people": + etype = "person" + for entry in area_dir.iterdir(): + if entry.is_dir(): + name = normalize(entry.name) + known[name] = {"type": etype, "source": f"life/areas/{category}"} + # From entities.json + entities = load_json(ENTITIES_FILE) + for name, info in entities.items(): + if name not in known: + known[name] = info + return known + + +def extract_entities(text, known=None): + """Extract entities from text using heuristics and known entity matching.""" + if known is None: + known = load_known_entities() + + found = {} + text_lower = text.lower() + + # 1. Match known entities + for name, info in known.items(): + # Check for name or slug in text + variants = [name, name.replace("-", " "), name.replace("-", "")] + for v in variants: + if v in text_lower and len(v) > 2: + found[name] = {"type": info.get("type", "unknown"), "match": "known"} + break + + # 2. Extract @mentions + for m in re.finditer(r"@(\w+)", text): + name = normalize(m.group(1)) + if name not in found and name not in STOP_WORDS and len(name) > 2: + found[name] = {"type": "person", "match": "mention"} + + # 3. Extract capitalized multi-word names (likely proper nouns) + for m in re.finditer(r"\b([A-Z][a-zäöüß]+(?:\s+[A-Z][a-zäöüß]+)+)\b", text): + name = normalize(m.group(1)) + if name not in found and name not in STOP_WORDS and len(name) > 3: + # Heuristic: if 2-3 words, likely person; if more, likely org/topic + words = name.split() + etype = "person" if len(words) <= 3 else "topic" + found[name] = {"type": etype, "match": "capitalized"} + + # 4. Extract standalone capitalized words (potential entities) + for m in re.finditer(r"\b([A-Z][a-zäöüß]{2,})\b", text): + name = normalize(m.group(1)) + if name not in found and name not in STOP_WORDS: + found[name] = {"type": "unknown", "match": "capitalized_single"} + + # 5. Extract ALL-CAPS acronyms (likely companies/products) + for m in re.finditer(r"\b([A-Z]{2,6})\b", text): + name = normalize(m.group(1)) + if name not in found and name not in STOP_WORDS and name not in { + "ok", "am", "pm", "gmt", "utc", "url", "api", "cli", "ssh", "dns", + "http", "https", "json", "html", "css", "js", "ts", "py", "md", + "id", "ui", "ux", "io", "os", "ip", "gb", "mb", "kb", "tb", + }: + found[name] = {"type": "organization", "match": "acronym"} + + return found + + +def cmd_bootstrap(): + """Bootstrap entities from life/areas/.""" + entities = load_json(ENTITIES_FILE) + relationships = load_json(RELATIONSHIPS_FILE) + count = 0 + + for category in ["people", "companies"]: + area_dir = LIFE_AREAS / category + if not area_dir.exists(): + continue + etype = "person" if category == "people" else "company" + for entry in sorted(area_dir.iterdir()): + if not entry.is_dir(): + continue + name = normalize(entry.name) + if name in entities: + continue + + info = {"type": etype, "source": f"life/areas/{category}", "bootstrapped": True} + + # Try to extract extra info from summary.md + summary_path = entry / "summary.md" + if summary_path.exists(): + try: + summary = summary_path.read_text(errors="replace")[:2000] + # Extract email + em = re.search(r"\*\*Email:\*\*\s*(\S+)", summary) + if em: + info["email"] = em.group(1) + # Extract context + ctx = re.search(r"\*\*Kontext:\*\*\s*(.+)", summary) + if ctx: + info["context"] = ctx.group(1).strip() + except Exception: + pass + + entities[name] = info + count += 1 + + save_json(ENTITIES_FILE, entities) + save_json(RELATIONSHIPS_FILE, relationships) + print(f"Bootstrapped {count} new entities. Total: {len(entities)}") + + +def cmd_extract(text): + """Extract and display entities from text.""" + known = load_known_entities() + found = extract_entities(text, known) + + if not found: + print("No entities found.") + return + + # Update entities.json with new discoveries + entities = load_json(ENTITIES_FILE) + new_count = 0 + for name, info in found.items(): + if name not in entities: + entities[name] = { + "type": info["type"], + "source": "extraction", + "first_seen": time.strftime("%Y-%m-%dT%H:%M:%S"), + } + new_count += 1 + print(f" [{info['type']:12s}] {name} ({info['match']})") + + if new_count: + save_json(ENTITIES_FILE, entities) + print(f"\n{new_count} new entities added to registry.") + + +def cmd_relate(entity_a, entity_b, rel_type="related"): + """Create or update a relationship between two entities.""" + a, b = normalize(entity_a), normalize(entity_b) + relationships = load_json(RELATIONSHIPS_FILE) + entities = load_json(ENTITIES_FILE) + + key = f"{min(a,b)}::{max(a,b)}" + ts = time.strftime("%Y-%m-%dT%H:%M:%S") + + if key in relationships: + rel = relationships[key] + rel["count"] = rel.get("count", 1) + 1 + rel["last_seen"] = ts + if rel_type != "related" and rel_type not in rel.get("types", []): + rel.setdefault("types", []).append(rel_type) + print(f"Updated: {a} <-> {b} (seen {rel['count']}x)") + else: + relationships[key] = { + "a": a, "b": b, + "types": [rel_type], + "count": 1, + "first_seen": ts, + "last_seen": ts, + } + print(f"Created: {a} <-> {b} ({rel_type})") + + # Ensure both entities exist + for name in [a, b]: + if name not in entities: + entities[name] = {"type": "unknown", "source": "relationship", "first_seen": ts} + + save_json(RELATIONSHIPS_FILE, relationships) + save_json(ENTITIES_FILE, entities) + + +def cmd_query(entity_name): + """Query all relationships for an entity.""" + name = normalize(entity_name) + relationships = load_json(RELATIONSHIPS_FILE) + entities = load_json(ENTITIES_FILE) + + # Entity info + if name in entities: + info = entities[name] + print(f"Entity: {name}") + print(f" Type: {info.get('type', 'unknown')}") + if info.get("email"): + print(f" Email: {info['email']}") + if info.get("context"): + print(f" Context: {info['context']}") + if info.get("source"): + print(f" Source: {info['source']}") + else: + print(f"Entity '{name}' not found in registry.") + + # Relationships + rels = [] + for key, rel in relationships.items(): + if rel["a"] == name or rel["b"] == name: + other = rel["b"] if rel["a"] == name else rel["a"] + rels.append((other, rel)) + + if rels: + print(f"\nRelationships ({len(rels)}):") + for other, rel in sorted(rels, key=lambda x: -x[1].get("count", 1)): + types = ", ".join(rel.get("types", ["related"])) + print(f" {name} <-> {other} [{types}] (seen {rel.get('count', 1)}x)") + else: + print("\nNo relationships found.") + + # Check life/areas/ + for category in ["people", "companies", "projects"]: + area_path = LIFE_AREAS / category / name.replace(" ", "-") + if area_path.exists(): + summary_path = area_path / "summary.md" + if summary_path.exists(): + print(f"\nLife area ({category}): {area_path}") + content = summary_path.read_text(errors="replace")[:500] + print(content) + + +def cmd_graph(): + """Output a simple relationship graph summary.""" + relationships = load_json(RELATIONSHIPS_FILE) + entities = load_json(ENTITIES_FILE) + + if not relationships: + print("No relationships in knowledge graph.") + return + + # Count connections per entity + connections = {} + for key, rel in relationships.items(): + for name in [rel["a"], rel["b"]]: + connections[name] = connections.get(name, 0) + 1 + + # Sort by connections + top = sorted(connections.items(), key=lambda x: -x[1]) + + print(f"Knowledge Graph: {len(entities)} entities, {len(relationships)} relationships\n") + print("Top connected entities:") + for name, count in top[:20]: + etype = entities.get(name, {}).get("type", "?") + print(f" {name} ({etype}): {count} connections") + + print(f"\nRecent relationships:") + recent = sorted(relationships.values(), key=lambda r: r.get("last_seen", ""), reverse=True)[:10] + for rel in recent: + types = ", ".join(rel.get("types", ["related"])) + print(f" {rel['a']} <-> {rel['b']} [{types}]") + + +def main(): + if len(sys.argv) < 2: + print(__doc__) + sys.exit(1) + + cmd = sys.argv[1] + + if cmd == "bootstrap": + cmd_bootstrap() + elif cmd == "extract": + if len(sys.argv) < 3: + print("Usage: entity-manager.py extract \"text\"") + sys.exit(1) + cmd_extract(" ".join(sys.argv[2:])) + elif cmd == "relate": + if len(sys.argv) < 4: + print("Usage: entity-manager.py relate \"entity_a\" \"entity_b\" [type]") + sys.exit(1) + rel_type = sys.argv[4] if len(sys.argv) > 4 else "related" + cmd_relate(sys.argv[2], sys.argv[3], rel_type) + elif cmd == "query": + if len(sys.argv) < 3: + print("Usage: entity-manager.py query \"entity\"") + sys.exit(1) + cmd_query(" ".join(sys.argv[2:])) + elif cmd == "graph": + cmd_graph() + else: + print(f"Unknown command: {cmd}") + print(__doc__) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/cortex/governance/__init__.py b/cortex/governance/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cortex/governance/cli.py b/cortex/governance/cli.py new file mode 100644 index 0000000..7b3e1b6 --- /dev/null +++ b/cortex/governance/cli.py @@ -0,0 +1,228 @@ +"""Governance CLI — policy evaluation, risk scoring, evidence & reporting. + +Usage: + darkplex governance evaluate --agent --action [--data-type ] [--target ] [--role ] + darkplex governance risk --agent --action [--data-type ] [--target ] [--role ] + darkplex governance evidence [--agent ] [--verdict ] [--control ] [--json] + darkplex governance report [--agent ] [--json] [--output ] + darkplex governance policies [--reload] + darkplex governance status +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from pathlib import Path + +# Default paths +DEFAULT_POLICIES_DIR = os.environ.get( + "GOVERNANCE_POLICIES_DIR", + str(Path(__file__).parent / "policies"), +) +DEFAULT_CONTROLS_MAPPING = os.environ.get( + "GOVERNANCE_CONTROLS_MAPPING", + str(Path(__file__).parent / "controls" / "iso27001-mapping.yaml"), +) + + +def _build_context(args: argparse.Namespace) -> dict: + """Build an evaluation context from CLI args.""" + ctx = {} + if args.agent: + ctx["agent"] = args.agent + if args.action: + ctx["action"] = args.action + if args.data_type: + ctx["data_type"] = args.data_type + if args.target: + ctx["target"] = args.target + if args.role: + ctx["agent_role"] = args.role + return ctx + + +def _get_engine(): + from governance.policy import PolicyEngine + return PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR) + + +def _get_scorer(): + from governance.risk_scorer import RiskScorer + return RiskScorer() + + +def _get_enforcer(): + from governance.enforcer import Enforcer + from governance.policy import PolicyEngine + from governance.risk_scorer import RiskScorer + from governance.evidence import EvidenceCollector, ControlMapping + + return Enforcer( + policy_engine=PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR), + risk_scorer=RiskScorer(), + evidence_collector=EvidenceCollector( + control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING) + ), + ) + + +def cmd_evaluate(args: argparse.Namespace) -> None: + """Full governance evaluation: policy + risk + evidence.""" + enforcer = _get_enforcer() + ctx = _build_context(args) + decision = enforcer.evaluate(ctx) + + if args.json: + print(json.dumps({ + "verdict": decision.verdict, + "reason": decision.reason, + "risk_score": decision.risk.value, + "risk_level": decision.risk.level, + "risk_factors": decision.risk.factors, + "policy_result": decision.policy_result, + }, indent=2)) + else: + icon = {"approve": "✅", "deny": "❌", "escalate": "⚠️"}.get(decision.verdict, "❓") + print(f"{icon} Verdict: {decision.verdict.upper()}") + print(f" Reason: {decision.reason}") + print(f" Risk: {decision.risk.value}/10 ({decision.risk.level})") + for factor, detail in decision.risk.factors.items(): + print(f" • {factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})") + + +def cmd_risk(args: argparse.Namespace) -> None: + """Risk scoring only.""" + scorer = _get_scorer() + ctx = _build_context(args) + result = scorer.score(ctx) + + if args.json: + print(json.dumps({ + "risk_score": result.value, + "risk_level": result.level, + "acceptable": result.is_acceptable, + "factors": result.factors, + }, indent=2)) + else: + icon = "🟢" if result.is_acceptable else "🔴" + print(f"{icon} Risk Score: {result.value}/10 ({result.level})") + print(f" Acceptable: {'yes' if result.is_acceptable else 'NO'}") + for factor, detail in result.factors.items(): + print(f" • {factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})") + + +def cmd_policies(args: argparse.Namespace) -> None: + """List loaded policies.""" + engine = _get_engine() + if not engine.policies: + print("No policies loaded.") + return + for policy in engine.policies: + print(f"📋 {policy.name} (v{policy.version})") + print(f" {policy.description}") + print(f" Rules: {len(policy.rules)}") + for rule in policy.rules: + print(f" • {rule.name} → {rule.effect} (priority: {rule.priority})") + print() + + +def cmd_status(args: argparse.Namespace) -> None: + """Show governance system status.""" + engine = _get_engine() + scorer = _get_scorer() + + policies_count = len(engine.policies) + rules_count = sum(len(p.rules) for p in engine.policies) + policies_dir = DEFAULT_POLICIES_DIR + controls_file = DEFAULT_CONTROLS_MAPPING + + print("🛡️ Darkplex Governance Status") + print(f" Policies dir: {policies_dir}") + print(f" Controls map: {controls_file}") + print(f" Policies loaded: {policies_count}") + print(f" Total rules: {rules_count}") + print(f" Policies dir exists: {'✅' if Path(policies_dir).exists() else '❌'}") + print(f" Controls file exists: {'✅' if Path(controls_file).exists() else '❌'}") + + +def cmd_report(args: argparse.Namespace) -> None: + """Generate compliance report (placeholder — needs live evidence).""" + from governance.evidence import EvidenceCollector, ControlMapping + from governance.report_generator import ReportGenerator + + collector = EvidenceCollector( + control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING) + ) + generator = ReportGenerator(collector) + + if args.agent: + report = generator.generate_agent_report(args.agent) + else: + report = generator.generate_compliance_report() + + output = json.dumps(report, indent=2) + if args.output: + Path(args.output).write_text(output) + print(f"✅ Report written to {args.output}") + else: + print(output) + + +def main() -> None: + parser = argparse.ArgumentParser(prog="darkplex governance", description="Governance Engine") + parser.add_argument("--json", action="store_true", help="JSON output") + sub = parser.add_subparsers(dest="subcmd") + + # evaluate + p_eval = sub.add_parser("evaluate", aliases=["eval"], help="Full policy + risk evaluation") + p_eval.add_argument("--agent", required=True) + p_eval.add_argument("--action", required=True) + p_eval.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"]) + p_eval.add_argument("--target", default="internal", choices=["internal", "external"]) + p_eval.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"]) + p_eval.add_argument("--json", action="store_true", dest="json") + + # risk + p_risk = sub.add_parser("risk", help="Risk scoring only") + p_risk.add_argument("--agent", default="unknown") + p_risk.add_argument("--action", default="unknown") + p_risk.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"]) + p_risk.add_argument("--target", default="internal", choices=["internal", "external"]) + p_risk.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"]) + p_risk.add_argument("--json", action="store_true", dest="json") + + # policies + p_pol = sub.add_parser("policies", help="List loaded policies") + p_pol.add_argument("--reload", action="store_true") + + # status + sub.add_parser("status", help="Show governance status") + + # report + p_rep = sub.add_parser("report", help="Generate compliance report") + p_rep.add_argument("--agent", default=None) + p_rep.add_argument("--output", "-o", default=None) + p_rep.add_argument("--json", action="store_true", dest="json") + + args = parser.parse_args() + + if args.subcmd in ("evaluate", "eval"): + cmd_evaluate(args) + elif args.subcmd == "risk": + cmd_risk(args) + elif args.subcmd == "policies": + cmd_policies(args) + elif args.subcmd == "status": + cmd_status(args) + elif args.subcmd == "report": + cmd_report(args) + else: + parser.print_help() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/cortex/governance/controls/iso27001-mapping.yaml b/cortex/governance/controls/iso27001-mapping.yaml new file mode 100644 index 0000000..1bafb02 --- /dev/null +++ b/cortex/governance/controls/iso27001-mapping.yaml @@ -0,0 +1,70 @@ +# ISO 27001 Annex A Controls → NATS Event Streams Mapping +# +# Maps governance event types to ISO 27001:2022 Annex A controls. +# Used by the Evidence Collector to tag evidence with applicable controls. + +version: "1.0.0" + +mappings: + # A.5 — Organizational Controls (Information Security Policies) + - controls: + - "A.5.1" # Policies for information security + - "A.5.2" # Information security roles and responsibilities + - "A.5.4" # Management responsibilities + event_types: + - policy_evaluation + - policy_update + - policy_violation + nats_subjects: + - "governance.policy.>" + + # A.5.10-12 — Acceptable use, return, classification + - controls: + - "A.5.10" # Acceptable use of information + - "A.5.12" # Classification of information + - "A.5.13" # Labelling of information + event_types: + - data_access + - data_classification + - data_export + nats_subjects: + - "governance.data.>" + + # A.8 — Technology Controls (Asset Management) + - controls: + - "A.8.1" # User endpoint devices + - "A.8.2" # Privileged access rights + - "A.8.5" # Secure authentication + - "A.8.9" # Configuration management + - "A.8.16" # Monitoring activities + event_types: + - agent_authentication + - agent_action + - system_configuration + - monitoring_alert + nats_subjects: + - "governance.agent.>" + - "governance.system.>" + + # A.9 — Access Control + - controls: + - "A.5.15" # Access control + - "A.5.16" # Identity management + - "A.5.17" # Authentication information + - "A.5.18" # Access rights + event_types: + - access_request + - access_granted + - access_denied + - role_change + nats_subjects: + - "governance.access.>" + + # A.5.23-25 — Supplier/Cloud + - controls: + - "A.5.23" # Information security for cloud services + event_types: + - external_api_call + - cloud_service_access + nats_subjects: + - "governance.external.>" diff --git a/cortex/governance/enforcer.py b/cortex/governance/enforcer.py new file mode 100644 index 0000000..8350fe3 --- /dev/null +++ b/cortex/governance/enforcer.py @@ -0,0 +1,129 @@ +"""Runtime Enforcer: pre-execution policy check (approve/deny/escalate). + +The enforcer is the single entry point for all agent action governance. +It orchestrates the policy engine, risk scorer, and evidence collector. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import Any + +from governance.evidence import EvidenceCollector +from governance.policy import PolicyEngine +from governance.risk_scorer import RiskResult, RiskScorer + +logger = logging.getLogger(__name__) + + +@dataclass +class Decision: + """The final governance decision for an agent action.""" + + verdict: str # "approve", "deny", "escalate" + reason: str + risk: RiskResult + policy_result: dict[str, Any] + + @property + def approved(self) -> bool: + return self.verdict == "approve" + + +class Enforcer: + """Pre-execution governance enforcer. + + Evaluates every agent action against policies and risk scoring, + records evidence, and returns a decision. + + Usage: + enforcer = Enforcer(policy_engine, risk_scorer, evidence_collector) + decision = enforcer.evaluate({"agent": "claudia", "action": "send_email", ...}) + if decision.approved: + execute_action() + """ + + # Risk levels that override policy to deny/escalate + RISK_OVERRIDES: dict[str, str] = { + "critical": "deny", + "high": "escalate", + } + + def __init__( + self, + policy_engine: PolicyEngine | None = None, + risk_scorer: RiskScorer | None = None, + evidence_collector: EvidenceCollector | None = None, + ) -> None: + self.policy_engine = policy_engine or PolicyEngine() + self.risk_scorer = risk_scorer or RiskScorer() + self.evidence_collector = evidence_collector or EvidenceCollector() + + def evaluate(self, context: dict[str, Any]) -> Decision: + """Evaluate an agent action and return a governance decision. + + Args: + context: Action context dict with keys like: + - agent: agent identifier + - action: action name + - data_type / data_classification: data sensitivity + - target: internal/external + - agent_role: role of the requesting agent + - hour: time of day (optional) + + Returns: + Decision with verdict, reason, risk score, and policy result. + """ + # Normalize data_type + if "data_classification" in context and "data_type" not in context: + context["data_type"] = context["data_classification"] + + # Step 1: Risk scoring + risk = self.risk_scorer.score(context) + + # Step 2: Policy evaluation + policy_result = self.policy_engine.evaluate(context) + policy_verdict = policy_result["verdict"] + + # Step 3: Combine — risk can override policy to be MORE restrictive + verdict = policy_verdict + reason = policy_result["reason"] + + risk_override = self.RISK_OVERRIDES.get(risk.level) + if risk_override: + strictness = {"deny": 0, "escalate": 1, "allow": 2} + if strictness.get(risk_override, 2) < strictness.get(verdict, 2): + verdict = risk_override + reason = f"Risk override ({risk.level}): {reason}" + + # Step 4: Record evidence + agent = context.get("agent", "unknown") + action = context.get("action", "unknown") + self.evidence_collector.record( + event_type="policy_evaluation", + agent=agent, + action=action, + verdict=verdict, + risk_score=risk.value, + risk_level=risk.level, + details={ + "context": context, + "policy_result": policy_result, + "risk_factors": risk.factors, + }, + ) + + decision = Decision( + verdict=verdict, + reason=reason, + risk=risk, + policy_result=policy_result, + ) + + logger.info( + "Enforcer decision: %s → %s (risk: %d/%s)", + f"{agent}/{action}", verdict, risk.value, risk.level, + ) + + return decision diff --git a/cortex/governance/evidence.py b/cortex/governance/evidence.py new file mode 100644 index 0000000..d2c10dc --- /dev/null +++ b/cortex/governance/evidence.py @@ -0,0 +1,153 @@ +"""Evidence Collector: NATS JetStream events → ISO 27001 control mapping. + +Collects governance events from NATS, maps them to ISO 27001 Annex A controls, +and stores evidence for audit reporting. +""" + +from __future__ import annotations + +import json +import logging +import os +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + +import yaml + +logger = logging.getLogger(__name__) + + +@dataclass +class EvidenceRecord: + """A single piece of compliance evidence.""" + + timestamp: str + event_type: str + agent: str + action: str + verdict: str + risk_score: int + risk_level: str + controls: list[str] # ISO 27001 control IDs + details: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return { + "timestamp": self.timestamp, + "event_type": self.event_type, + "agent": self.agent, + "action": self.action, + "verdict": self.verdict, + "risk_score": self.risk_score, + "risk_level": self.risk_level, + "controls": self.controls, + "details": self.details, + } + + +class ControlMapping: + """Maps event types to ISO 27001 Annex A controls.""" + + def __init__(self, mapping_path: str | None = None) -> None: + self.mapping: dict[str, list[str]] = {} + path = mapping_path or os.environ.get( + "GOVERNANCE_CONTROLS_MAPPING", "controls/iso27001-mapping.yaml" + ) + self._load_mapping(path) + + def _load_mapping(self, path: str) -> None: + """Load the control mapping from YAML.""" + try: + with open(path, "r") as f: + data = yaml.safe_load(f) + for mapping in data.get("mappings", []): + for event_type in mapping.get("event_types", []): + self.mapping.setdefault(event_type, []).extend(mapping.get("controls", [])) + logger.info("Loaded %d event type mappings", len(self.mapping)) + except FileNotFoundError: + logger.warning("Control mapping not found: %s", path) + except Exception: + logger.exception("Failed to load control mapping: %s", path) + + def get_controls(self, event_type: str) -> list[str]: + """Return ISO 27001 controls applicable to an event type.""" + return self.mapping.get(event_type, []) + + +class EvidenceCollector: + """Collects and stores governance evidence from agent actions. + + In production, this subscribes to NATS JetStream. For testing, + evidence can be recorded directly via record(). + + Usage: + collector = EvidenceCollector() + collector.record(event_type="policy_evaluation", agent="claudia", ...) + """ + + def __init__(self, control_mapping: ControlMapping | None = None) -> None: + self.control_mapping = control_mapping or ControlMapping() + self.evidence: list[EvidenceRecord] = [] + + def record( + self, + event_type: str, + agent: str, + action: str, + verdict: str, + risk_score: int = 0, + risk_level: str = "low", + details: dict[str, Any] | None = None, + ) -> EvidenceRecord: + """Record a governance evidence entry. + + Args: + event_type: Type of governance event (e.g., policy_evaluation, access_request) + agent: Agent identifier + action: Action being performed + verdict: Policy verdict (allow/deny/escalate) + risk_score: Numeric risk score (0-10) + risk_level: Risk level string + details: Additional context + """ + controls = self.control_mapping.get_controls(event_type) + + record = EvidenceRecord( + timestamp=datetime.now(timezone.utc).isoformat(), + event_type=event_type, + agent=agent, + action=action, + verdict=verdict, + risk_score=risk_score, + risk_level=risk_level, + controls=controls, + details=details or {}, + ) + + self.evidence.append(record) + logger.info( + "Evidence recorded: %s by %s → %s (risk: %d/%s, controls: %s)", + action, agent, verdict, risk_score, risk_level, controls, + ) + return record + + def get_evidence( + self, + agent: str | None = None, + control: str | None = None, + verdict: str | None = None, + ) -> list[EvidenceRecord]: + """Query evidence with optional filters.""" + results = self.evidence + if agent: + results = [e for e in results if e.agent == agent] + if control: + results = [e for e in results if control in e.controls] + if verdict: + results = [e for e in results if e.verdict == verdict] + return results + + def export_json(self) -> str: + """Export all evidence as JSON.""" + return json.dumps([e.to_dict() for e in self.evidence], indent=2) diff --git a/cortex/governance/policies/data-access.yaml b/cortex/governance/policies/data-access.yaml new file mode 100644 index 0000000..be9ed55 --- /dev/null +++ b/cortex/governance/policies/data-access.yaml @@ -0,0 +1,46 @@ +# Data Access Policy +# Defines who can access which data classifications. + +name: data-access +description: Controls agent access to data based on classification and role +version: "1.0.0" + +rules: + - name: deny-external-restricted + description: External agents cannot access restricted data + conditions: + agent_role: external + data_type: restricted + effect: deny + priority: 100 + + - name: escalate-confidential-external + description: Confidential data going external requires escalation + conditions: + data_type: confidential + target: external + effect: escalate + priority: 90 + + - name: deny-restricted-external + description: Restricted data must never leave internal systems + conditions: + data_type: restricted + target: external + effect: deny + priority: 100 + + - name: allow-public-any + description: Public data can be accessed by anyone + conditions: + data_type: public + effect: allow + priority: 10 + + - name: allow-internal-internal + description: Internal data accessible within internal systems + conditions: + data_type: internal + target: internal + effect: allow + priority: 50 diff --git a/cortex/governance/policies/external-comms.yaml b/cortex/governance/policies/external-comms.yaml new file mode 100644 index 0000000..f16116f --- /dev/null +++ b/cortex/governance/policies/external-comms.yaml @@ -0,0 +1,40 @@ +# External Communications Policy +# Controls when and how agents can communicate externally. + +name: external-comms +description: Governs agent communication with external systems and parties +version: "1.0.0" + +rules: + - name: deny-assistant-external-email + description: Assistants cannot send external emails without escalation + conditions: + agent_role: assistant + action: send_email + target: external + effect: escalate + priority: 80 + + - name: allow-operator-external + description: Operators may communicate externally + conditions: + agent_role: operator + target: external + effect: allow + priority: 70 + + - name: deny-external-api-restricted + description: No external API calls with restricted data + conditions: + action: api_call + target: external + data_type: restricted + effect: deny + priority: 100 + + - name: allow-internal-comms + description: Internal communication is always allowed + conditions: + target: internal + effect: allow + priority: 10 diff --git a/cortex/governance/policies/financial-data.yaml b/cortex/governance/policies/financial-data.yaml new file mode 100644 index 0000000..9e3ede4 --- /dev/null +++ b/cortex/governance/policies/financial-data.yaml @@ -0,0 +1,42 @@ +# Financial Data Policy (BaFin-relevant) +# Strict controls for financial data handling. + +name: financial-data +description: BaFin-compliant financial data governance +version: "1.0.0" + +rules: + - name: deny-financial-external + description: Financial data must not leave internal systems + conditions: + data_type: restricted + action: export_data + target: external + effect: deny + priority: 100 + + - name: escalate-financial-access + description: All access to financial data requires escalation + conditions: + data_type: restricted + action: read_financial + effect: escalate + priority: 95 + + - name: deny-financial-offhours + description: Financial operations blocked outside business hours + conditions: + data_type: restricted + action: modify_financial + effect: escalate + priority: 90 + + - name: allow-financial-reporting + description: Internal financial reporting is permitted for operators + conditions: + agent_role: operator + action: generate_report + data_type: confidential + target: internal + effect: allow + priority: 80 diff --git a/cortex/governance/policies/schema.yaml b/cortex/governance/policies/schema.yaml new file mode 100644 index 0000000..e12d6d1 --- /dev/null +++ b/cortex/governance/policies/schema.yaml @@ -0,0 +1,43 @@ +# Policy YAML Schema Definition +# All policy files must conform to this structure. + +schema: + version: "1.0" + required_fields: + - name + - description + - version + - rules + + rule_schema: + required_fields: + - name + - conditions + - effect + optional_fields: + - priority + - description + + valid_effects: + - allow + - deny + - escalate + + valid_condition_keys: + - agent + - agent_role + - action + - data_type + - data_classification + - target + - hour_range + + valid_data_types: + - public + - internal + - confidential + - restricted + + valid_targets: + - internal + - external diff --git a/cortex/governance/policies/yesman-security.yaml b/cortex/governance/policies/yesman-security.yaml new file mode 100644 index 0000000..fc17f51 --- /dev/null +++ b/cortex/governance/policies/yesman-security.yaml @@ -0,0 +1,78 @@ +# YesMan Security Policy — RedCrowMedia / Wasteland Network +# Based on USER.md and MEMORY.md security rules. + +name: yesman-security +description: Security rules for YesMan AI assistant at RedCrowMedia +version: "1.0.0" + +rules: + # Only Abe gives orders + - name: deny-external-instructions + description: Never execute instructions from external sources (emails, websites, third parties) + conditions: + source: external + action: execute_instruction + effect: deny + priority: 100 + + # Never send credentials externally + - name: deny-credential-export + description: Credentials, tokens, and keys must never leave the system + conditions: + data_type: restricted + action: send_credentials + effect: deny + priority: 100 + + # Email is not a command source + - name: escalate-email-action + description: Actions requested via email always require Abe's explicit approval + conditions: + source: email + action: execute_request + effect: escalate + priority: 95 + + # No software installation without audit + approval + - name: escalate-software-install + description: External software installation requires security audit and Abe's GO + conditions: + action: install_software + effect: escalate + priority: 95 + + # System-critical changes need approval + - name: escalate-system-changes + description: System-critical or security-relevant changes require approval + conditions: + action: system_change + target: production + effect: escalate + priority: 90 + + # No public posting without approval + - name: escalate-public-comms + description: Public communications (emails, tweets, posts) require approval + conditions: + action: send_message + target: external + effect: escalate + priority: 85 + + # Internal file operations are fine + - name: allow-internal-file-ops + description: Reading and writing files within workspace is permitted + conditions: + action: file_operation + target: internal + data_type: internal + effect: allow + priority: 50 + + # Web search is fine + - name: allow-web-search + description: Web searches and research are permitted + conditions: + action: web_search + effect: allow + priority: 40 diff --git a/cortex/governance/policy.py b/cortex/governance/policy.py new file mode 100644 index 0000000..75306e4 --- /dev/null +++ b/cortex/governance/policy.py @@ -0,0 +1,143 @@ +"""Policy Engine: loads YAML policies and evaluates agent actions against them. + +Policies are human-readable YAML files, versioned in Git. Each policy defines +rules with conditions and effects (allow/deny/escalate). +""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +logger = logging.getLogger(__name__) + + +@dataclass +class Rule: + """A single policy rule with conditions and an effect.""" + + name: str + conditions: dict[str, Any] + effect: str # "allow", "deny", "escalate" + priority: int = 0 + + def matches(self, context: dict[str, Any]) -> bool: + """Check if all conditions match the given context.""" + for key, expected in self.conditions.items(): + actual = context.get(key) + if actual is None: + return False + if isinstance(expected, list): + if actual not in expected: + return False + elif actual != expected: + return False + return True + + +@dataclass +class Policy: + """A named policy containing ordered rules.""" + + name: str + description: str + version: str + rules: list[Rule] = field(default_factory=list) + + +class PolicyEngine: + """Loads and evaluates YAML-based governance policies. + + Usage: + engine = PolicyEngine(policies_dir="policies/") + result = engine.evaluate(action_context) + """ + + def __init__(self, policies_dir: str | None = None) -> None: + self.policies_dir = Path( + policies_dir or os.environ.get("GOVERNANCE_POLICIES_DIR", "policies/") + ) + self.policies: list[Policy] = [] + self._load_policies() + + def _load_policies(self) -> None: + """Load all YAML policy files from the policies directory.""" + if not self.policies_dir.exists(): + logger.warning("Policies directory not found: %s", self.policies_dir) + return + + for path in sorted(self.policies_dir.glob("*.yaml")): + if path.name == "schema.yaml": + continue + try: + policy = self._parse_policy(path) + self.policies.append(policy) + logger.info("Loaded policy: %s (%d rules)", policy.name, len(policy.rules)) + except Exception: + logger.exception("Failed to load policy: %s", path) + + def _parse_policy(self, path: Path) -> Policy: + """Parse a YAML file into a Policy object.""" + with open(path, "r") as f: + data = yaml.safe_load(f) + + rules = [] + for rule_data in data.get("rules", []): + rules.append( + Rule( + name=rule_data["name"], + conditions=rule_data.get("conditions", {}), + effect=rule_data.get("effect", "deny"), + priority=rule_data.get("priority", 0), + ) + ) + + return Policy( + name=data.get("name", path.stem), + description=data.get("description", ""), + version=data.get("version", "1.0.0"), + rules=rules, + ) + + def evaluate(self, context: dict[str, Any]) -> dict[str, Any]: + """Evaluate an action context against all loaded policies. + + Returns the highest-priority matching rule's effect, or 'allow' if no rules match. + """ + matches: list[tuple[Rule, Policy]] = [] + + for policy in self.policies: + for rule in policy.rules: + if rule.matches(context): + matches.append((rule, policy)) + + if not matches: + return { + "verdict": "allow", + "reason": "No matching policy rules", + "matched_rules": [], + } + + # Sort by priority (highest first), then by strictness (deny > escalate > allow) + effect_order = {"deny": 0, "escalate": 1, "allow": 2} + matches.sort(key=lambda m: (-m[0].priority, effect_order.get(m[0].effect, 2))) + + top_rule, top_policy = matches[0] + return { + "verdict": top_rule.effect, + "reason": f"Policy '{top_policy.name}', rule '{top_rule.name}'", + "matched_rules": [ + {"policy": p.name, "rule": r.name, "effect": r.effect} + for r, p in matches + ], + } + + def reload(self) -> None: + """Reload all policies from disk.""" + self.policies.clear() + self._load_policies() diff --git a/cortex/governance/report_generator.py b/cortex/governance/report_generator.py new file mode 100644 index 0000000..22e6b08 --- /dev/null +++ b/cortex/governance/report_generator.py @@ -0,0 +1,109 @@ +"""Audit Report Generator: creates compliance reports from collected evidence. + +Generates structured reports grouped by ISO 27001 controls, time periods, +and agent activity. +""" + +from __future__ import annotations + +import json +import logging +from collections import defaultdict +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any + +from governance.evidence import EvidenceCollector, EvidenceRecord + +logger = logging.getLogger(__name__) + + +@dataclass +class ReportSection: + """A section of an audit report.""" + + title: str + entries: list[dict[str, Any]] + summary: dict[str, Any] + + +class ReportGenerator: + """Generates audit reports from governance evidence. + + Usage: + generator = ReportGenerator(evidence_collector) + report = generator.generate_compliance_report() + """ + + def __init__(self, evidence_collector: EvidenceCollector) -> None: + self.collector = evidence_collector + + def generate_compliance_report(self) -> dict[str, Any]: + """Generate a full compliance report grouped by ISO 27001 controls.""" + evidence = self.collector.evidence + if not evidence: + return {"generated_at": _now_iso(), "status": "no_evidence", "sections": []} + + by_control: dict[str, list[EvidenceRecord]] = defaultdict(list) + for record in evidence: + for control in record.controls: + by_control[control].append(record) + + sections = [] + for control_id in sorted(by_control.keys()): + records = by_control[control_id] + sections.append({ + "control": control_id, + "total_events": len(records), + "verdicts": _count_verdicts(records), + "risk_distribution": _count_risk_levels(records), + "agents": list({r.agent for r in records}), + }) + + return { + "generated_at": _now_iso(), + "total_evidence": len(evidence), + "controls_covered": list(sorted(by_control.keys())), + "summary": { + "total_deny": sum(1 for e in evidence if e.verdict == "deny"), + "total_escalate": sum(1 for e in evidence if e.verdict == "escalate"), + "total_allow": sum(1 for e in evidence if e.verdict == "allow"), + "high_risk_events": sum(1 for e in evidence if e.risk_score >= 7), + }, + "sections": sections, + } + + def generate_agent_report(self, agent: str) -> dict[str, Any]: + """Generate a report for a specific agent.""" + evidence = self.collector.get_evidence(agent=agent) + return { + "generated_at": _now_iso(), + "agent": agent, + "total_actions": len(evidence), + "verdicts": _count_verdicts(evidence), + "risk_distribution": _count_risk_levels(evidence), + "actions": [e.to_dict() for e in evidence], + } + + def export_json(self) -> str: + """Export the compliance report as formatted JSON.""" + report = self.generate_compliance_report() + return json.dumps(report, indent=2) + + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _count_verdicts(records: list[EvidenceRecord]) -> dict[str, int]: + counts: dict[str, int] = defaultdict(int) + for r in records: + counts[r.verdict] += 1 + return dict(counts) + + +def _count_risk_levels(records: list[EvidenceRecord]) -> dict[str, int]: + counts: dict[str, int] = defaultdict(int) + for r in records: + counts[r.risk_level] += 1 + return dict(counts) diff --git a/cortex/governance/risk_scorer.py b/cortex/governance/risk_scorer.py new file mode 100644 index 0000000..1e8f5c4 --- /dev/null +++ b/cortex/governance/risk_scorer.py @@ -0,0 +1,126 @@ +"""Risk Scorer: context-based risk scoring for agent actions. + +Risk levels: + - low (0-3): routine operations + - elevated (4-6): notable but acceptable + - high (7-8): requires escalation + - critical (9-10): auto-deny + alert + +Factors: data classification, target (internal/external), agent role, time of day. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any + +logger = logging.getLogger(__name__) + +# Data classification weights +DATA_WEIGHTS: dict[str, int] = { + "public": 0, + "internal": 2, + "confidential": 5, + "restricted": 8, +} + +# Target weights +TARGET_WEIGHTS: dict[str, int] = { + "internal": 0, + "external": 3, +} + +# Agent role weights (lower = more trusted) +ROLE_WEIGHTS: dict[str, int] = { + "admin": -1, + "operator": 0, + "assistant": 1, + "external": 3, +} + +# Off-hours bonus (outside 8-18) +OFF_HOURS_BONUS = 2 + + +@dataclass +class RiskResult: + """Result of a risk assessment.""" + + value: int + level: str + factors: dict[str, Any] + + @property + def is_acceptable(self) -> bool: + return self.value <= 6 + + +def _classify_level(score: int) -> str: + """Map a numeric score to a risk level.""" + if score <= 3: + return "low" + elif score <= 6: + return "elevated" + elif score <= 8: + return "high" + else: + return "critical" + + +class RiskScorer: + """Calculates contextual risk scores for agent actions. + + Usage: + scorer = RiskScorer() + result = scorer.score({"data_type": "confidential", "target": "external"}) + """ + + def score(self, context: dict[str, Any]) -> RiskResult: + """Score an action context and return a RiskResult. + + Args: + context: Dict with optional keys: + - data_type: public|internal|confidential|restricted + - target: internal|external + - agent_role: admin|operator|assistant|external + - hour: 0-23 (defaults to current hour UTC) + """ + factors: dict[str, Any] = {} + total = 0 + + # Data classification + data_type = context.get("data_type", "public") + data_score = DATA_WEIGHTS.get(data_type, 0) + factors["data_type"] = {"value": data_type, "score": data_score} + total += data_score + + # Target + target = context.get("target", "internal") + target_score = TARGET_WEIGHTS.get(target, 0) + factors["target"] = {"value": target, "score": target_score} + total += target_score + + # Agent role + role = context.get("agent_role", "assistant") + role_score = ROLE_WEIGHTS.get(role, 1) + factors["agent_role"] = {"value": role, "score": role_score} + total += role_score + + # Time of day + hour = context.get("hour") + if hour is None: + hour = datetime.now(timezone.utc).hour + is_off_hours = hour < 8 or hour >= 18 + time_score = OFF_HOURS_BONUS if is_off_hours else 0 + factors["time_of_day"] = {"hour": hour, "off_hours": is_off_hours, "score": time_score} + total += time_score + + # Clamp to 0-10 + total = max(0, min(10, total)) + + level = _classify_level(total) + logger.debug("Risk score: %d (%s) — factors: %s", total, level, factors) + + return RiskResult(value=total, level=level, factors=factors) diff --git a/cortex/intelligence/__init__.py b/cortex/intelligence/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cortex/intelligence/anticipator.py b/cortex/intelligence/anticipator.py new file mode 100644 index 0000000..cd3ab51 --- /dev/null +++ b/cortex/intelligence/anticipator.py @@ -0,0 +1,193 @@ +"""Proactive Intelligence: pattern-based predictions and anticipation. + +Detects patterns in historical events and generates proactive alerts: +- SSL certificate expiry approaching +- Recurring issues (same error pattern at predictable intervals) +- Usage pattern anomalies +- Resource exhaustion trends +""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass, field +from datetime import datetime, timedelta, timezone +from enum import Enum +from typing import Any, Callable + +logger = logging.getLogger(__name__) + + +class AlertSeverity(Enum): + INFO = "info" + WARNING = "warning" + CRITICAL = "critical" + + +@dataclass +class Prediction: + """A proactive prediction about a future event.""" + + pattern_name: str + description: str + severity: AlertSeverity + predicted_time: datetime | None = None + confidence: float = 0.0 # 0.0-1.0 + recommended_action: str = "" + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class PatternDefinition: + """Definition of a detectable pattern.""" + + name: str + description: str + detector: Callable[[list[dict[str, Any]]], Prediction | None] + + +class Anticipator: + """Proactive intelligence engine that detects patterns and generates predictions. + + Usage: + anticipator = Anticipator() + anticipator.register_pattern(ssl_expiry_pattern) + predictions = anticipator.analyze(events) + """ + + def __init__(self) -> None: + self.patterns: list[PatternDefinition] = [] + self._register_builtin_patterns() + + def register_pattern(self, pattern: PatternDefinition) -> None: + """Register a new pattern detector.""" + self.patterns.append(pattern) + logger.info("Registered pattern: %s", pattern.name) + + def analyze(self, events: list[dict[str, Any]]) -> list[Prediction]: + """Analyze events against all registered patterns. + + Args: + events: List of event dicts with at minimum 'timestamp', 'type', 'data'. + + Returns: + List of predictions, sorted by severity (critical first). + """ + predictions: list[Prediction] = [] + + for pattern in self.patterns: + try: + prediction = pattern.detector(events) + if prediction: + predictions.append(prediction) + logger.info( + "Pattern detected: %s (severity: %s, confidence: %.2f)", + prediction.pattern_name, + prediction.severity.value, + prediction.confidence, + ) + except Exception: + logger.exception("Pattern detector failed: %s", pattern.name) + + # Sort: critical first, then by confidence + severity_order = {AlertSeverity.CRITICAL: 0, AlertSeverity.WARNING: 1, AlertSeverity.INFO: 2} + predictions.sort(key=lambda p: (severity_order.get(p.severity, 3), -p.confidence)) + + return predictions + + def _register_builtin_patterns(self) -> None: + """Register built-in pattern detectors.""" + self.register_pattern(PatternDefinition( + name="ssl_cert_expiry", + description="Detects SSL certificates approaching expiry", + detector=_detect_ssl_expiry, + )) + self.register_pattern(PatternDefinition( + name="recurring_error", + description="Detects recurring error patterns", + detector=_detect_recurring_errors, + )) + self.register_pattern(PatternDefinition( + name="usage_spike", + description="Detects unusual usage spikes", + detector=_detect_usage_spike, + )) + + +def _detect_ssl_expiry(events: list[dict[str, Any]]) -> Prediction | None: + """Detect SSL certificates that will expire within 14 days.""" + now = datetime.now(timezone.utc) + threshold = timedelta(days=14) + + for event in events: + if event.get("type") != "ssl_cert_check": + continue + expiry_str = event.get("data", {}).get("expiry") + if not expiry_str: + continue + try: + expiry = datetime.fromisoformat(expiry_str) + if expiry.tzinfo is None: + expiry = expiry.replace(tzinfo=timezone.utc) + except (ValueError, TypeError): + continue + + remaining = expiry - now + if remaining < threshold: + domain = event.get("data", {}).get("domain", "unknown") + severity = AlertSeverity.CRITICAL if remaining.days < 3 else AlertSeverity.WARNING + return Prediction( + pattern_name="ssl_cert_expiry", + description=f"SSL certificate for {domain} expires in {remaining.days} days", + severity=severity, + predicted_time=expiry, + confidence=0.95, + recommended_action=f"Renew SSL certificate for {domain}", + metadata={"domain": domain, "days_remaining": remaining.days}, + ) + return None + + +def _detect_recurring_errors(events: list[dict[str, Any]]) -> Prediction | None: + """Detect recurring error patterns (same error type appearing 3+ times).""" + error_counts: dict[str, int] = {} + for event in events: + if event.get("type") == "error": + error_key = event.get("data", {}).get("error_type", "unknown") + error_counts[error_key] = error_counts.get(error_key, 0) + 1 + + for error_type, count in error_counts.items(): + if count >= 3: + return Prediction( + pattern_name="recurring_error", + description=f"Recurring error '{error_type}' detected ({count} occurrences)", + severity=AlertSeverity.WARNING, + confidence=min(0.5 + count * 0.1, 0.95), + recommended_action=f"Investigate root cause of '{error_type}'", + metadata={"error_type": error_type, "count": count}, + ) + return None + + +def _detect_usage_spike(events: list[dict[str, Any]]) -> Prediction | None: + """Detect unusual usage spikes (>2x average in recent window).""" + usage_events = [e for e in events if e.get("type") == "usage_metric"] + if len(usage_events) < 10: + return None + + values = [e.get("data", {}).get("value", 0) for e in usage_events] + avg = sum(values) / len(values) + recent = values[-3:] if len(values) >= 3 else values + recent_avg = sum(recent) / len(recent) if recent else 0 + + if avg > 0 and recent_avg > avg * 2: + return Prediction( + pattern_name="usage_spike", + description=f"Usage spike detected: recent avg {recent_avg:.1f} vs overall {avg:.1f}", + severity=AlertSeverity.WARNING, + confidence=0.7, + recommended_action="Investigate usage spike — potential anomaly or load increase", + metadata={"average": avg, "recent_average": recent_avg, "ratio": recent_avg / avg}, + ) + return None diff --git a/cortex/intelligence/collective.py b/cortex/intelligence/collective.py new file mode 100644 index 0000000..32dc7a5 --- /dev/null +++ b/cortex/intelligence/collective.py @@ -0,0 +1,154 @@ +"""Collective Learning: aggregates patterns across all internal agents. + +Subscribes to the shared memory bus, collects insights from all +Vainplex-internal agents, and builds an aggregated knowledge base +for pattern detection and cross-agent learning. + +🚨 STRICT DATA ISOLATION: Only Vainplex-internal agents participate. +No customer data. No customer agent insights. Ever. +""" + +from __future__ import annotations + +import json +import logging +import os +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + +from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory + +logger = logging.getLogger(__name__) + + +@dataclass +class AggregatedPattern: + """A pattern detected across multiple agents.""" + + topic: str + description: str + contributing_agents: list[str] + confidence: float + occurrence_count: int + first_seen: str + last_seen: str + insights: list[Insight] = field(default_factory=list) + + +class CollectiveLearning: + """Aggregates patterns from all internal agents into collective knowledge. + + Usage: + collective = CollectiveLearning(shared_memory) + await collective.start() + patterns = collective.get_patterns() + + ⚠️ DATA ISOLATION: Only processes insights from ALLOWED_AGENTS. + """ + + def __init__(self, shared_memory: SharedMemory) -> None: + self.shared_memory = shared_memory + self._insights_by_topic: dict[str, list[Insight]] = defaultdict(list) + self._patterns: list[AggregatedPattern] = [] + + async def start(self) -> None: + """Start listening for insights on all topics.""" + await self.shared_memory.subscribe(">", self._handle_insight) + logger.info("Collective learning started — listening for insights") + + async def _handle_insight(self, insight: Insight) -> None: + """Process an incoming insight.""" + # Double-check data isolation + if insight.agent not in ALLOWED_AGENTS: + logger.warning("Rejected insight from non-internal agent: %s", insight.agent) + return + + self._insights_by_topic[insight.topic].append(insight) + logger.debug( + "Collected insight: %s from %s (topic: %s)", + insight.content[:60], insight.agent, insight.topic, + ) + + # Re-analyze patterns when new data arrives + self._detect_patterns() + + def _detect_patterns(self) -> None: + """Analyze collected insights to find cross-agent patterns.""" + new_patterns: list[AggregatedPattern] = [] + + for topic, insights in self._insights_by_topic.items(): + if len(insights) < 2: + continue + + agents = list({i.agent for i in insights}) + if len(agents) < 2: + # Single-agent observations aren't "collective" patterns + continue + + timestamps = sorted(i.timestamp for i in insights) + avg_confidence = sum(i.confidence for i in insights) / len(insights) + + pattern = AggregatedPattern( + topic=topic, + description=f"Cross-agent pattern on '{topic}' observed by {', '.join(agents)}", + contributing_agents=agents, + confidence=avg_confidence, + occurrence_count=len(insights), + first_seen=timestamps[0], + last_seen=timestamps[-1], + insights=insights, + ) + new_patterns.append(pattern) + + self._patterns = new_patterns + + def get_patterns( + self, + topic: str | None = None, + min_confidence: float = 0.0, + ) -> list[AggregatedPattern]: + """Retrieve detected collective patterns. + + Args: + topic: Filter by topic (optional). + min_confidence: Minimum confidence threshold. + """ + patterns = self._patterns + if topic: + patterns = [p for p in patterns if p.topic == topic] + if min_confidence > 0: + patterns = [p for p in patterns if p.confidence >= min_confidence] + return patterns + + def get_topic_summary(self) -> dict[str, Any]: + """Get a summary of all topics and their insight counts.""" + return { + topic: { + "count": len(insights), + "agents": list({i.agent for i in insights}), + "latest": max(i.timestamp for i in insights) if insights else None, + } + for topic, insights in self._insights_by_topic.items() + } + + def export_knowledge(self) -> str: + """Export collective knowledge as JSON.""" + return json.dumps({ + "exported_at": datetime.now(timezone.utc).isoformat(), + "allowed_agents": sorted(ALLOWED_AGENTS), + "patterns": [ + { + "topic": p.topic, + "description": p.description, + "contributing_agents": p.contributing_agents, + "confidence": p.confidence, + "occurrence_count": p.occurrence_count, + "first_seen": p.first_seen, + "last_seen": p.last_seen, + } + for p in self._patterns + ], + "topics": self.get_topic_summary(), + }, indent=2) diff --git a/cortex/intelligence/knowledge_cleanup.py b/cortex/intelligence/knowledge_cleanup.py new file mode 100644 index 0000000..b064e0b --- /dev/null +++ b/cortex/intelligence/knowledge_cleanup.py @@ -0,0 +1,420 @@ +#!/usr/bin/env python3 +"""Knowledge graph cleanup: classify unknowns, deduplicate entities, score relationships. + +Usage: + darkplex cleanup [--classify] [--dedupe] [--score] [--dry-run] + +If no flags given, runs all three steps. +""" + +import argparse +import copy +import json +import logging +import os +import shutil +import sys +import time +from collections import defaultdict +from datetime import datetime, timedelta +from pathlib import Path + +import requests + +log = logging.getLogger("knowledge_cleanup") + +KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge" +ENTITIES_PATH = KNOWLEDGE_DIR / "entities.json" +RELATIONSHIPS_PATH = KNOWLEDGE_DIR / "relationships.json" +OLLAMA_URL = "http://localhost:11434" +OLLAMA_MODEL = "qwen2.5:7b" + +VALID_TYPES = {"person", "organization", "company", "project", "technology", + "location", "event", "concept", "product"} + + +def backup(path: Path) -> Path: + """Create timestamped backup.""" + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = path.with_suffix(f".backup_{ts}.json") + shutil.copy2(path, backup_path) + log.info(f"Backed up {path.name} → {backup_path.name}") + return backup_path + + +def atomic_write(path: Path, data): + """Write JSON atomically via temp file.""" + tmp = path.with_suffix(".tmp") + with open(tmp, "w") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + tmp.replace(path) + log.info(f"Wrote {path.name}") + + +def load_entities() -> dict: + with open(ENTITIES_PATH) as f: + return json.load(f) + + +def load_relationships() -> dict: + with open(RELATIONSHIPS_PATH) as f: + return json.load(f) + + +def ollama_generate(prompt: str, timeout: int = 120) -> str: + """Call Ollama generate API.""" + resp = requests.post(f"{OLLAMA_URL}/api/generate", json={ + "model": OLLAMA_MODEL, + "prompt": prompt, + "stream": False, + "options": {"temperature": 0.1, "num_predict": 2000} + }, timeout=timeout) + resp.raise_for_status() + return resp.json().get("response", "") + + +# ─── Task 1: Classify Unknowns ─────────────────────────────────────────────── + +def classify_unknowns(entities: dict, dry_run: bool = False) -> dict: + """Classify entities with type='unknown' using LLM.""" + unknowns = {k: v for k, v in entities.items() + if isinstance(v, dict) and v.get("type") == "unknown"} + + if not unknowns: + log.info("No unknown entities to classify.") + return entities + + log.info(f"Classifying {len(unknowns)} unknown entities...") + + names = list(unknowns.keys()) + batch_size = 50 + results = {} + + for i in range(0, len(names), batch_size): + batch = names[i:i + batch_size] + batch_num = i // batch_size + 1 + total_batches = (len(names) + batch_size - 1) // batch_size + log.info(f"Batch {batch_num}/{total_batches} ({len(batch)} entities)") + + numbered = "\n".join(f"{j+1}. {name}" for j, name in enumerate(batch)) + prompt = f"""Classify each entity name into exactly one category. +Categories: person, organization, company, project, technology, location, event, concept, product + +If a name looks like a person's first name only (e.g. "sarah", "thomas"), classify as person. +If it's a common word that isn't clearly an entity (e.g. "ahnung", "wir", "evtl", "schau"), classify as concept. +If unsure, classify as concept. + +Respond with ONLY a JSON object mapping the number to the category. Example: +{{"1": "person", "2": "company", "3": "concept"}} + +Entities: +{numbered} + +JSON:""" + + try: + response = ollama_generate(prompt) + # Extract JSON from response + start = response.find("{") + end = response.rfind("}") + 1 + if start >= 0 and end > start: + parsed = json.loads(response[start:end]) + for idx_str, category in parsed.items(): + idx = int(idx_str) - 1 + if 0 <= idx < len(batch): + cat = category.strip().lower() + if cat in VALID_TYPES: + results[batch[idx]] = cat + except Exception as e: + log.warning(f"Batch {batch_num} failed: {e}") + continue + + time.sleep(0.5) # Be nice to Ollama + + # Apply results + stats = defaultdict(int) + for name, new_type in results.items(): + old_type = entities[name].get("type", "unknown") + if old_type != new_type: + stats[f"{old_type} → {new_type}"] += 1 + if not dry_run: + entities[name]["type"] = new_type + entities[name]["classified_by"] = "llm_cleanup" + entities[name]["classified_at"] = datetime.now().isoformat() + + log.info(f"Classified {len(results)}/{len(unknowns)} unknowns:") + for transition, count in sorted(stats.items(), key=lambda x: -x[1]): + log.info(f" {transition}: {count}") + + remaining = sum(1 for k, v in entities.items() + if isinstance(v, dict) and v.get("type") == "unknown") + log.info(f"Remaining unknowns: {remaining}") + + return entities + + +# ─── Task 2: Deduplicate ───────────────────────────────────────────────────── + +def find_duplicates(entities: dict) -> list: + """Find duplicate entity groups via case-insensitive matching.""" + # Group by normalized name + groups = defaultdict(list) + for name in entities: + normalized = name.strip().lower() + groups[normalized].append(name) + + # Also check for substring containment (e.g. "mondo gate" vs "mondo gate ag") + names_lower = {name: name.strip().lower() for name in entities} + sorted_names = sorted(names_lower.items(), key=lambda x: len(x[1])) + + # Find names where one is a prefix/substring of another + substring_pairs = [] + for i, (name_a, low_a) in enumerate(sorted_names): + if len(low_a) < 3: + continue + for name_b, low_b in sorted_names[i+1:]: + if low_a == low_b: + continue + if low_b.startswith(low_a + " ") or low_b.startswith(low_a + "-"): + substring_pairs.append((name_a, name_b)) + + # Build merge groups + merge_groups = [] + + # Exact case duplicates + for normalized, names in groups.items(): + if len(names) > 1: + merge_groups.append(names) + + # Substring matches (merge into existing groups or create new) + for short, long in substring_pairs: + found = False + for group in merge_groups: + if short in group or long in group: + if short not in group: + group.append(short) + if long not in group: + group.append(long) + found = True + break + if not found: + merge_groups.append([short, long]) + + return merge_groups + + +def pick_canonical(names: list, entities: dict) -> str: + """Pick the most detailed entity name as canonical.""" + # Prefer: longest name, most fields, not all-lowercase + def score(name): + e = entities.get(name, {}) + fields = len(e) if isinstance(e, dict) else 0 + length = len(name) + has_upper = int(any(c.isupper() for c in name)) + return (has_upper, fields, length) + + return max(names, key=score) + + +def deduplicate(entities: dict, relationships: dict, dry_run: bool = False) -> tuple: + """Deduplicate entities and update relationships.""" + groups = find_duplicates(entities) + + if not groups: + log.info("No duplicates found.") + return entities, relationships + + log.info(f"Found {len(groups)} duplicate groups:") + + alias_map = {} # old_name → canonical_name + + for group in groups: + canonical = pick_canonical(group, entities) + aliases = [n for n in group if n != canonical] + + if not aliases: + continue + + log.info(f" Canonical: '{canonical}' ← aliases: {aliases}") + + for alias in aliases: + alias_map[alias] = canonical + + if not dry_run: + # Merge fields into canonical + canonical_entry = entities.get(canonical, {}) + if not isinstance(canonical_entry, dict): + canonical_entry = {} + + existing_aliases = canonical_entry.get("aliases", []) + for alias in aliases: + if alias not in existing_aliases: + existing_aliases.append(alias) + alias_entry = entities.get(alias, {}) + if isinstance(alias_entry, dict): + # Merge non-existing fields + for k, v in alias_entry.items(): + if k not in canonical_entry and k not in ("type", "aliases"): + canonical_entry[k] = v + + canonical_entry["aliases"] = existing_aliases + entities[canonical] = canonical_entry + + # Remove aliases from entities + for alias in aliases: + if alias in entities: + del entities[alias] + + # Update relationships + if not dry_run and alias_map: + updated_rels = {} + remapped = 0 + for key, rel in relationships.items(): + a = rel.get("a", "") + b = rel.get("b", "") + new_a = alias_map.get(a, a) + new_b = alias_map.get(b, b) + + if new_a != a or new_b != b: + remapped += 1 + rel["a"] = new_a + rel["b"] = new_b + + new_key = f"{new_a}::{new_b}" + + if new_key in updated_rels: + # Merge: sum counts, keep latest last_seen + existing = updated_rels[new_key] + existing["count"] = existing.get("count", 0) + rel.get("count", 0) + if rel.get("last_seen", "") > existing.get("last_seen", ""): + existing["last_seen"] = rel["last_seen"] + if rel.get("first_seen", "") < existing.get("first_seen", ""): + existing["first_seen"] = rel["first_seen"] + # Merge types + existing_types = set(existing.get("types", [])) + existing_types.update(rel.get("types", [])) + existing["types"] = list(existing_types) + else: + updated_rels[new_key] = rel + + log.info(f"Remapped {remapped} relationships, merged {len(relationships) - len(updated_rels)} duplicates") + relationships = updated_rels + + log.info(f"Merged {len(alias_map)} aliases into {len(set(alias_map.values()))} canonical entities") + + return entities, relationships + + +# ─── Task 3: Relationship Scoring ──────────────────────────────────────────── + +def score_relationships(relationships: dict, dry_run: bool = False) -> dict: + """Add strength scores and decay old relationships.""" + now = datetime.now() + decay_threshold = now - timedelta(days=30) + + removed = 0 + scored = 0 + decayed = 0 + + to_remove = [] + + for key, rel in relationships.items(): + count = rel.get("count", 1) + last_seen_str = rel.get("last_seen", "") + first_seen_str = rel.get("first_seen", "") + types = rel.get("types", []) + + # Base strength from count (log scale, capped at 1) + import math + count_score = min(1.0, math.log(count + 1) / math.log(100)) + + # Context diversity: more relationship types = stronger + diversity_score = min(1.0, len(types) * 0.3) + + # Recency score + recency_score = 1.0 + if last_seen_str: + try: + last_seen = datetime.fromisoformat(last_seen_str) + days_ago = (now - last_seen).days + if days_ago > 30: + recency_score = max(0.0, 1.0 - (days_ago - 30) / 180) + decayed += 1 + except (ValueError, TypeError): + pass + + # Combined strength + strength = round( + count_score * 0.4 + diversity_score * 0.3 + recency_score * 0.3, + 3 + ) + + if strength < 0.1: + to_remove.append(key) + removed += 1 + else: + if not dry_run: + rel["strength"] = strength + scored += 1 + + if not dry_run: + for key in to_remove: + del relationships[key] + + log.info(f"Scored {scored} relationships, decayed {decayed}, removed {removed} (strength < 0.1)") + + return relationships + + +# ─── Main ──────────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Knowledge graph cleanup") + parser.add_argument("--classify", action="store_true", help="Classify unknown entities") + parser.add_argument("--dedupe", action="store_true", help="Deduplicate entities") + parser.add_argument("--score", action="store_true", help="Score relationships") + parser.add_argument("--dry-run", action="store_true", help="Show changes without writing") + args = parser.parse_args() + + # If no specific flags, run all + run_all = not (args.classify or args.dedupe or args.score) + + entities = load_entities() + relationships = load_relationships() + + log.info(f"Loaded {len(entities)} entities, {len(relationships)} relationships") + + # Backup before any modifications + if not args.dry_run: + backup(ENTITIES_PATH) + backup(RELATIONSHIPS_PATH) + + if args.dry_run: + log.info("═══ DRY RUN — no files will be modified ═══") + + if run_all or args.classify: + log.info("─── Step 1: Classify Unknowns ───") + entities = classify_unknowns(entities, dry_run=args.dry_run) + + if run_all or args.dedupe: + log.info("─── Step 2: Deduplicate Entities ───") + entities, relationships = deduplicate(entities, relationships, dry_run=args.dry_run) + + if run_all or args.score: + log.info("─── Step 3: Score Relationships ───") + relationships = score_relationships(relationships, dry_run=args.dry_run) + + if not args.dry_run: + atomic_write(ENTITIES_PATH, entities) + atomic_write(RELATIONSHIPS_PATH, relationships) + log.info(f"Done. Final: {len(entities)} entities, {len(relationships)} relationships") + else: + log.info(f"Dry run complete. Would result in: {len(entities)} entities, {len(relationships)} relationships") + + +if __name__ == "__main__": + logging.basicConfig( + format='%(asctime)s %(name)s %(levelname)s %(message)s', + level=logging.INFO, + ) + main() diff --git a/cortex/intelligence/llm_extractor.py b/cortex/intelligence/llm_extractor.py new file mode 100644 index 0000000..151ad7b --- /dev/null +++ b/cortex/intelligence/llm_extractor.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +LLM-Powered Entity Extractor — Uses Ollama for Named Entity Recognition. + +Standalone module. No pip dependencies beyond stdlib. +Calls Ollama HTTP API with structured NER prompts. + +Configuration via environment variables: + DARKPLEX_OLLAMA_URL — Ollama base URL (default: http://localhost:11434) + DARKPLEX_OLLAMA_MODEL — Model name (default: mistral:7b) + DARKPLEX_OLLAMA_TIMEOUT — Timeout in seconds (default: 10) + DARKPLEX_EXTRACTOR — llm|regex|auto (default: auto) +""" + +import json +import logging +import os +import urllib.request +import urllib.error + +log = logging.getLogger("llm-extractor") + +OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434") +OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "llama3.2:1b") +OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30")) + +VALID_TYPES = {"person", "organization", "company", "project", "technology", + "location", "event", "concept", "product"} + +NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object. +Each key is the entity name (lowercase), each value has "type" and "context". + +Valid types: person, organization, company, project, technology, location, event, concept, product + +Rules: +- Skip common/generic words (the, system, message, etc.) +- Entity names should be lowercase, use hyphens for multi-word +- "context" is a 2-5 word description of the entity's role in the text +- If no entities found, return empty JSON object +- Return ONLY valid JSON, no explanation + +Text: +{text} + +JSON:""" + +BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object. +Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context". + +Valid types: person, organization, company, project, technology, location, event, concept, product + +Rules: +- Skip common/generic words +- "context" is a 2-5 word description +- If no entities found, return empty JSON object +- Return ONLY valid JSON, no markdown, no explanation + +Texts: +{texts} + +JSON:""" + + +def _call_ollama(prompt: str) -> str | None: + """Call Ollama generate API. Returns response text or None on failure.""" + payload = json.dumps({ + "model": OLLAMA_MODEL, + "prompt": prompt, + "stream": False, + "options": {"temperature": 0.1, "num_predict": 1024}, + }).encode() + + req = urllib.request.Request( + f"{OLLAMA_URL}/api/generate", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp: + data = json.loads(resp.read().decode()) + return data.get("response", "") + except (urllib.error.URLError, TimeoutError, OSError) as e: + log.warning(f"Ollama call failed: {e}") + return None + except Exception as e: + log.warning(f"Ollama unexpected error: {e}") + return None + + +def _parse_json_response(text: str) -> dict: + """Extract JSON dict from LLM response, handling markdown fences etc.""" + if not text: + return {} + # Strip markdown code fences + text = text.strip() + if text.startswith("```"): + lines = text.split("\n") + lines = [l for l in lines if not l.strip().startswith("```")] + text = "\n".join(lines) + + # Find the JSON object + start = text.find("{") + if start == -1: + return {} + + # Find matching closing brace + depth = 0 + for i in range(start, len(text)): + if text[i] == "{": + depth += 1 + elif text[i] == "}": + depth -= 1 + if depth == 0: + try: + return json.loads(text[start:i + 1]) + except json.JSONDecodeError: + return {} + return {} + + +def _normalize_entities(raw: dict) -> dict: + """Normalize and validate extracted entities.""" + result = {} + for name, info in raw.items(): + if not isinstance(info, dict): + continue + name = name.strip().lower().replace("_", "-").replace(" ", "-") + if len(name) < 2 or len(name) > 80: + continue + + etype = info.get("type", "unknown").lower().strip() + if etype not in VALID_TYPES: + # Map common aliases + aliases = {"org": "organization", "tech": "technology", "loc": "location", + "place": "location", "tool": "technology", "framework": "technology", + "language": "technology", "app": "product", "software": "product", + "service": "product", "group": "organization", "team": "organization"} + etype = aliases.get(etype, "concept") + + context = info.get("context", "") + if isinstance(context, str): + context = context[:100] + else: + context = "" + + result[name] = {"type": etype, "context": context, "match": "llm"} + + return result + + +def extract_entities_llm(text: str) -> dict[str, dict] | None: + """ + Extract entities from text using Ollama LLM. + + Returns dict of {name: {type, context, match}} or None if LLM unavailable. + None signals caller to fall back to regex. + """ + if not text or len(text) < 10: + return {} + + # Truncate very long texts + if len(text) > 2000: + text = text[:2000] + + prompt = NER_PROMPT.format(text=text) + response = _call_ollama(prompt) + if response is None: + return None # Signal fallback + + raw = _parse_json_response(response) + return _normalize_entities(raw) + + +def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None: + """ + Extract entities from multiple texts in one LLM call. + + Returns combined dict or None if LLM unavailable. + """ + if not texts: + return {} + + # Filter and truncate + clean = [] + for t in texts: + if t and len(t) >= 10: + clean.append(t[:500] if len(t) > 500 else t) + if not clean: + return {} + + # Limit batch size to keep prompt reasonable + if len(clean) > 10: + clean = clean[:10] + + numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean)) + prompt = BATCH_PROMPT.format(texts=numbered) + response = _call_ollama(prompt) + if response is None: + return None + + raw = _parse_json_response(response) + return _normalize_entities(raw) + + +def is_available() -> bool: + """Check if Ollama is reachable.""" + try: + req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET") + with urllib.request.urlopen(req, timeout=3) as resp: + return resp.status == 200 + except Exception: + return False diff --git a/cortex/intelligence/loop.py b/cortex/intelligence/loop.py new file mode 100644 index 0000000..98930af --- /dev/null +++ b/cortex/intelligence/loop.py @@ -0,0 +1,830 @@ +#!/usr/bin/env python3 +""" +Darkplex Loop — The single heartbeat of the intelligence pipeline. + +One process. One loop. One state machine. +Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health. + +Each cycle: + 1. INGEST — Fetch new events from NATS (batch consumer pull) + 2. EXTRACT — Pull entities and relationships from events + 3. BRIDGE — Sync cortex outputs to knowledge engine + 4. VERIFY — Check that real output was produced + 5. REPORT — Update state, alert on failure + +States: + RUNNING — Everything nominal + DEGRADED — A step failed, but loop continues with recovery attempts + EMERGENCY — Critical failure, alerting + +Usage: + darkplex loop # Run loop (default: 1h cycle) + darkplex loop --once # Single cycle, then exit + darkplex loop --cycle 3600 # Custom cycle interval (seconds) + darkplex loop --status # Print current state and exit + darkplex loop --check # Check for new events, exit 0=new 1=none +""" + +import json +import logging +import os +import re +import signal +import subprocess +import sys +import time +import traceback +import urllib.request +from collections import deque +from datetime import datetime, timezone +from pathlib import Path + +# ── Paths (configurable via env) ───────────────────────────────────────────── + +BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd")) +SCRIPT_DIR = BASE_DIR / "scripts" +LEVEL4_DIR = SCRIPT_DIR / "level4" +LOG_DIR = BASE_DIR / "logs" +STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json" +KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge")) +ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json" +RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json" + +NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events") +NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop") +NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000")) +DEFAULT_CYCLE_SECONDS = 3600 # 1 hour +ALERT_COOLDOWN = 3600 # 1 alert per hour max + +log = logging.getLogger("darkplex-loop") + + +# ── State Machine ──────────────────────────────────────────────────────────── + +class LoopState: + """Persistent state for the Darkplex Loop.""" + + def __init__(self): + self.status = "INIT" + self.cycle_count = 0 + self.last_cycle = None + self.last_success = None + self.last_failure = None + self.last_alert = None + self.consecutive_failures = 0 + self.entities_total = 0 + self.relationships_total = 0 + self.entities_extracted_last = 0 + self.entities_new_last = 0 + self.events_processed_last = 0 + self.steps = {} + self.error = None + self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms + self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}] + self.quality_metrics = {} # {unknown_rate, llm_success_rate, avg_entities_per_event} + self.quality_history = [] # last 10: [{cycle, unknown_rate, llm_success_rate}] + self.ollama_status = "unknown" # healthy|degraded|down + self._load() + + def _load(self): + try: + data = json.loads(STATE_FILE.read_text()) + for k, v in data.items(): + if hasattr(self, k): + setattr(self, k, v) + except (FileNotFoundError, json.JSONDecodeError): + pass + + def save(self): + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str)) + + def record_perf(self, perf: dict): + """Record performance metrics for this cycle.""" + self.perf = perf + # Include unknown_rate in perf_history if available + if self.quality_metrics: + perf["unknown_rate"] = self.quality_metrics.get("unknown_rate", 0) + self.perf_history.append(perf) + self.perf_history = self.perf_history[-10:] # keep last 10 + + def perf_averages(self) -> dict: + """Running averages over last 10 cycles.""" + if not self.perf_history: + return {} + keys = self.perf_history[0].keys() + return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys} + + def record_success(self, step_results: dict): + self.status = "RUNNING" + self.consecutive_failures = 0 + self.last_success = datetime.now(timezone.utc).isoformat() + self.last_cycle = self.last_success + self.cycle_count += 1 + self.steps = step_results + self.error = None + self.save() + + def record_failure(self, step: str, error: str): + self.consecutive_failures += 1 + self.last_failure = datetime.now(timezone.utc).isoformat() + self.last_cycle = self.last_failure + self.cycle_count += 1 + self.error = f"{step}: {error}" + if self.consecutive_failures >= 3: + self.status = "EMERGENCY" + else: + self.status = "DEGRADED" + self.save() + + def can_alert(self) -> bool: + if not self.last_alert: + return True + try: + last = datetime.fromisoformat(self.last_alert) + return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN + except (ValueError, TypeError): + return True + + def mark_alerted(self): + self.last_alert = datetime.now(timezone.utc).isoformat() + self.save() + + +# ── Pipeline Steps ─────────────────────────────────────────────────────────── + +def _nats_cmd(): + """Build NATS CLI base command with auth.""" + nats_bin = os.environ.get("NATS_BIN", "nats") + nats_url = os.environ.get("NATS_URL", "") + if nats_url: + return [nats_bin, "-s", nats_url] + return [nats_bin] + + +def check_new_events() -> int: + """Return number of pending events in the consumer. 0 = nothing new.""" + try: + r = subprocess.run( + _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"], + capture_output=True, text=True, timeout=10, + ) + if r.returncode != 0: + return -1 + info = json.loads(r.stdout) + return info.get("num_pending", 0) + except Exception as e: + log.warning(f"check_new_events failed: {e}") + return -1 + + +def step_ingest(state: LoopState) -> dict: + """Step 1: Fetch new events from NATS using batch consumer pull.""" + log.info("STEP 1: INGEST — Fetching events from NATS") + + last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json" + + # Check how many pending + pending = check_new_events() + if pending == 0: + log.info("INGEST: No new events — skipping cycle") + return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"} + log.info(f"INGEST: {pending} pending events in consumer") + + events = [] + total_fetched = 0 + parse_errors = 0 + + # Fetch in batches + remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE + try: + batch_size = min(remaining, NATS_BATCH_SIZE) + result = subprocess.run( + _nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER, + "--count", str(batch_size), "--raw"], + capture_output=True, text=True, timeout=30, + ) + if result.returncode != 0: + log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential") + return _step_ingest_sequential(state) + + for line in result.stdout.strip().split("\n"): + if not line.strip(): + continue + try: + data = json.loads(line) + events.append(data) + total_fetched += 1 + except json.JSONDecodeError: + parse_errors += 1 + + except subprocess.TimeoutExpired: + log.warning("Batch fetch timed out, falling back to sequential") + return _step_ingest_sequential(state) + + # Update sequence tracking (get current stream seq from consumer info) + try: + r = subprocess.run( + _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"], + capture_output=True, text=True, timeout=10, + ) + if r.returncode == 0: + info = json.loads(r.stdout) + stream_seq = info["delivered"]["stream_seq"] + last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True) + last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq})) + except Exception: + log.warning("Could not save last processed sequence") + + log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)") + return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors} + + +def _step_ingest_sequential(state: LoopState) -> dict: + """Fallback: sequential fetch via stream get (slow but reliable).""" + import base64 + log.info("INGEST FALLBACK: Sequential fetch") + + last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json" + last_processed_seq = 0 + try: + if last_processed_seq_file.exists(): + last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0) + except Exception: + pass + + r = subprocess.run( + _nats_cmd() + ["stream", "info", NATS_STREAM, "--json"], + capture_output=True, text=True, timeout=10, + ) + if r.returncode != 0: + return {"events": [], "total_scanned": 0, "skipped": 0} + + info = json.loads(r.stdout) + end_seq = info["state"]["last_seq"] + start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE) + + events = [] + skipped = 0 + for seq in range(start_seq, end_seq + 1): + try: + result = subprocess.run( + _nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode != 0: + skipped += 1 + continue + msg = json.loads(result.stdout) + if "conversation_message_in" not in msg.get("subject", ""): + skipped += 1 + continue + data = json.loads(base64.b64decode(msg["data"]).decode("utf-8")) + events.append(data) + except Exception: + skipped += 1 + + try: + last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True) + last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq})) + except Exception: + pass + + log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})") + return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped} + + +def step_extract(state: LoopState, events: list) -> dict: + """Step 2: Extract entities and relationships from events.""" + log.info(f"STEP 2: EXTRACT — Processing {len(events)} events") + + if not events: + log.info("EXTRACT: No events to process") + return {"extracted": 0, "new_entities": 0, "new_relationships": 0} + + sys.path.insert(0, str(LEVEL4_DIR)) + import importlib.util + spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py") + em = importlib.util.module_from_spec(spec) + spec.loader.exec_module(em) + + # Try LLM batch extraction first + from llm_extractor import extract_entities_llm_batch, is_available as llm_available + use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto") + llm_ok = use_llm and llm_available() + if llm_ok: + log.info("EXTRACT: Using LLM extractor (Ollama)") + else: + log.info("EXTRACT: Using regex extractor (fallback)") + + known = em.load_known_entities() + entities = em.load_json(ENTITIES_FILE) + relationships = em.load_json(RELATIONSHIPS_FILE) + + total_extracted = 0 + new_entities = 0 + new_relationships = 0 + ts_now = time.strftime("%Y-%m-%dT%H:%M:%S") + + # Prepare texts for potential batch LLM processing + event_texts = [] + for event in events: + payload = event.get("payload", {}) + text = payload.get("text_preview", "") or payload.get("text", "") + if isinstance(text, list): + parts = [] + for t in text: + parts.append(t.get("text", "") if isinstance(t, dict) else str(t)) + text = " ".join(parts) + if not isinstance(text, str): + text = str(text) + score = _importance(text) if text else 0.0 + event_texts.append((text, score)) + + # LLM batch extraction for qualifying texts (cap at 50 to keep cycle time reasonable) + llm_results = {} + if llm_ok: + batch_texts = [t for t, s in sorted( + [(t, s) for t, s in event_texts if t and s >= 0.4], + key=lambda x: -x[1] # highest importance first + )][:50] + if batch_texts: + consecutive_fails = 0 + for i in range(0, len(batch_texts), 10): + if consecutive_fails >= 3: + log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex") + llm_ok = False + break + chunk = batch_texts[i:i+10] + batch_result = extract_entities_llm_batch(chunk) + if batch_result: + llm_results.update(batch_result) + consecutive_fails = 0 + else: + consecutive_fails += 1 + if llm_results: + log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities") + + for idx, event in enumerate(events): + text, score = event_texts[idx] + if not text or score < 0.4: + continue + + if llm_ok and llm_results: + # Use LLM results + known entity matching + found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {} + # Add LLM entities that appear in this text + text_lower = text.lower() + for name, info in llm_results.items(): + variants = [name, name.replace("-", " "), name.replace("-", "")] + if any(v in text_lower for v in variants if len(v) > 2): + found[name] = info + else: + found = em.extract_entities(text, known) + if not found: + continue + + total_extracted += len(found) + names = list(found.keys()) + + for name, info in found.items(): + if name not in entities: + entities[name] = { + "type": info["type"], + "source": "darkplex-loop", + "first_seen": ts_now, + } + new_entities += 1 + known[name] = entities[name] + + if len(names) >= 2: + for i in range(len(names)): + for j in range(i + 1, min(len(names), i + 5)): + a, b = min(names[i], names[j]), max(names[i], names[j]) + key = f"{a}::{b}" + if key in relationships: + relationships[key]["count"] = relationships[key].get("count", 1) + 1 + relationships[key]["last_seen"] = ts_now + else: + relationships[key] = { + "a": a, "b": b, "types": ["co-occurrence"], + "count": 1, "first_seen": ts_now, "last_seen": ts_now, + } + new_relationships += 1 + + em.save_json(ENTITIES_FILE, entities) + em.save_json(RELATIONSHIPS_FILE, relationships) + + state.entities_total = len(entities) + state.relationships_total = len(relationships) + state.entities_extracted_last = total_extracted + state.entities_new_last = new_entities + state.events_processed_last = len(events) + + log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships") + return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships} + + +def step_bridge(state: LoopState) -> dict: + """Step 3: Run knowledge bridge.""" + log.info("STEP 3: BRIDGE — Syncing cortex outputs") + + bridge_script = SCRIPT_DIR / "knowledge-bridge.py" + if not bridge_script.exists(): + log.warning("BRIDGE: knowledge-bridge.py not found, skipping") + return {"status": "skipped", "reason": "script not found"} + + result = subprocess.run( + [sys.executable, str(bridge_script), "sync"], + capture_output=True, text=True, timeout=120, + ) + + if result.returncode != 0: + log.warning(f"BRIDGE: Failed — {result.stderr[:200]}") + return {"status": "failed", "error": result.stderr[:200]} + + bridged = 0 + for line in result.stdout.split("\n"): + m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I) + if m: + bridged += int(m.group(1)) + + log.info(f"BRIDGE: {bridged} items bridged") + return {"status": "ok", "bridged": bridged} + + +def _check_quality(state: LoopState, extract_result: dict) -> list: + """Check entity quality metrics. Returns list of issues/warnings.""" + issues = [] + + # Load entities and compute unknown_rate + try: + entities = json.loads(ENTITIES_FILE.read_text()) if ENTITIES_FILE.exists() else {} + except (json.JSONDecodeError, OSError): + entities = {} + + total = len(entities) + unknown_count = sum(1 for e in entities.values() if e.get("type") == "unknown") + unknown_rate = (unknown_count / total * 100) if total > 0 else 0.0 + + events_processed = state.events_processed_last or 1 + extracted = extract_result.get("extracted", 0) + avg_entities_per_event = extracted / events_processed if events_processed > 0 else 0.0 + + # Estimate LLM success rate from extraction (if LLM was used, new_entities > 0 is a proxy) + llm_success_rate = 100.0 # default if no LLM used + # We track this per-cycle based on whether extraction produced results + if events_processed > 10 and extracted == 0: + llm_success_rate = 0.0 + + state.quality_metrics = { + "unknown_rate": round(unknown_rate, 1), + "llm_success_rate": round(llm_success_rate, 1), + "avg_entities_per_event": round(avg_entities_per_event, 2), + } + + if unknown_rate > 30: + issues.append(f"High unknown entity rate: {unknown_rate:.1f}% ({unknown_count}/{total})") + + # Track quality history and detect trends + state.quality_history.append({ + "cycle": state.cycle_count + 1, + "unknown_rate": round(unknown_rate, 1), + "llm_success_rate": round(llm_success_rate, 1), + }) + state.quality_history = state.quality_history[-10:] # keep last 10 + + # Check if unknown_rate rising 3 cycles in a row + if len(state.quality_history) >= 3: + last3 = [h["unknown_rate"] for h in state.quality_history[-3:]] + if last3[0] < last3[1] < last3[2]: + issues.append(f"Entity quality degrading — unknown_rate rising: {last3}") + + log.info(f"VERIFY/QUALITY: unknown_rate={unknown_rate:.1f}%, avg_entities/event={avg_entities_per_event:.2f}") + return issues + + +def _check_ollama(state: LoopState) -> list: + """Check Ollama health. Returns list of issues.""" + issues = [] + model = os.environ.get("DARKPLEX_OLLAMA_MODEL", os.environ.get("OLLAMA_MODEL", "")) + + try: + req = urllib.request.Request("http://localhost:11434/api/tags", method="GET") + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + models = [m.get("name", "") for m in data.get("models", [])] + if model and not any(model in m for m in models): + state.ollama_status = "degraded" + issues.append(f"Ollama up but model '{model}' not loaded (available: {models[:5]})") + log.warning(f"VERIFY/OLLAMA: degraded — model '{model}' not in {models[:5]}") + else: + state.ollama_status = "healthy" + log.info(f"VERIFY/OLLAMA: healthy ({len(models)} models)") + except Exception as e: + state.ollama_status = "down" + issues.append(f"Ollama down: {e}") + log.warning(f"VERIFY/OLLAMA: down — {e}") + + return issues + + +def _check_performance(state: LoopState) -> list: + """Check for performance regressions. Returns list of issues.""" + issues = [] + + if len(state.perf_history) < 2: + return issues + + current = state.perf + avgs = state.perf_averages() + + # Check total time vs rolling average + curr_total = current.get("total_ms", 0) + avg_total = avgs.get("total_ms", 0) + if avg_total > 0 and curr_total > 2 * avg_total: + issues.append(f"Performance regression detected: {curr_total}ms vs avg {avg_total}ms") + + # Check extraction time + extract_ms = current.get("extract_ms", 0) + if extract_ms > 120000: + issues.append(f"Extraction too slow: {extract_ms}ms (>2min)") + + if issues: + for i in issues: + log.warning(f"VERIFY/PERF: {i}") + else: + log.info(f"VERIFY/PERF: OK (total={curr_total}ms, avg={avg_total}ms)") + + return issues + + +def step_verify(state: LoopState, extract_result: dict) -> dict: + """Step 4: Verify output quality.""" + log.info("STEP 4: VERIFY — Checking output quality") + + issues = [] + + # File integrity checks + for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]: + if not f.exists(): + issues.append(f"{label} file missing") + else: + try: + data = json.loads(f.read_text()) + if not data: + issues.append(f"{label} file is empty") + except json.JSONDecodeError: + issues.append(f"{label} file is corrupt JSON") + + events_processed = state.events_processed_last + extracted = extract_result.get("extracted", 0) + if events_processed > 10 and extracted == 0: + issues.append(f"0 entities from {events_processed} events — extraction may be broken") + + # NATS check + try: + r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10) + if r.returncode != 0: + issues.append("NATS unreachable") + except Exception as e: + issues.append(f"NATS check failed: {e}") + + # New monitoring checks + issues.extend(_check_quality(state, extract_result)) + issues.extend(_check_ollama(state)) + issues.extend(_check_performance(state)) + + verdict = "PASS" if not issues else "FAIL" + log.info(f"VERIFY: {verdict} — {len(issues)} issues") + for issue in issues: + log.warning(f" ⚠ {issue}") + + return {"verdict": verdict, "issues": issues} + + +def step_report(state: LoopState, verify_result: dict): + """Step 5: Alert if degraded/emergency.""" + if state.status == "RUNNING": + return + + if not state.can_alert(): + log.info("REPORT: Alert cooldown active, skipping") + return + + severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED" + msg = ( + f"Darkplex Loop {severity}\n" + f"Consecutive failures: {state.consecutive_failures}\n" + f"Error: {state.error}\n" + f"Issues: {', '.join(verify_result.get('issues', []))}" + ) + + log.warning(f"REPORT: Sending alert — {state.status}") + + try: + subprocess.run( + ["python3", str(SCRIPT_DIR / "vera-alert.py"), msg], + capture_output=True, text=True, timeout=15, + ) + except Exception: + pass + + flag = LOG_DIR / "darkplex-loop-alert.flag" + flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}") + state.mark_alerted() + + +# ── Helpers ────────────────────────────────────────────────────────────────── + +def _importance(text: str) -> float: + """Importance scoring for event text.""" + if not text: + return 0.0 + score = 0.3 + if len(text) > 200: score += 0.1 + if len(text) > 500: score += 0.1 + caps = len(re.findall(r"\b[A-Z][a-z]+\b", text)) + if caps > 3: score += 0.1 + if caps > 8: score += 0.1 + for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]: + if p.lower() in text.lower(): + score -= 0.3 + for w in ["meeting", "project", "company", "contract", "decision", "strategy", + "budget", "deadline", "milestone", "partnership", "investment", "revenue", + "client", "proposal", "agreement"]: + if w in text.lower(): + score += 0.05 + return max(0.0, min(1.0, score)) + + +def print_status(): + """Print current loop state.""" + state = LoopState() + + ent_count = rel_count = 0 + try: + ent_count = len(json.loads(ENTITIES_FILE.read_text())) + except Exception: + pass + try: + rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text())) + except Exception: + pass + + icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "⚪") + print(f"{icon} Status: {state.status}") + print(f"Cycles: {state.cycle_count}") + print(f"Last cycle: {state.last_cycle or 'never'}") + print(f"Last success: {state.last_success or 'never'}") + print(f"Last failure: {state.last_failure or 'never'}") + print(f"Failures: {state.consecutive_failures}") + print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)") + print(f"Relationships:{rel_count} total") + if state.quality_metrics: + qm = state.quality_metrics + print(f"Quality: unknown_rate={qm.get('unknown_rate', '?')}% llm_success={qm.get('llm_success_rate', '?')}% avg_ent/event={qm.get('avg_entities_per_event', '?')}") + print(f"Ollama: {state.ollama_status}") + if state.perf: + print(f"Last perf: {state.perf}") + if state.error: + print(f"Error: {state.error}") + + +# ── Main Loop ──────────────────────────────────────────────────────────────── + +def _ms_since(t0: float) -> int: + return int((time.monotonic() - t0) * 1000) + + +def run_cycle(state: LoopState) -> bool: + """Run one complete pipeline cycle. Returns True on success.""" + log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══") + step_results = {} + perf = {} + t_cycle = time.monotonic() + + try: + t0 = time.monotonic() + ingest = step_ingest(state) + perf["ingest_ms"] = _ms_since(t0) + step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]} + + # Early skip if no new events + if ingest.get("skip_reason") == "no_new_events": + perf["total_ms"] = _ms_since(t_cycle) + state.record_perf(perf) + state.save() + log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══") + return True + + t0 = time.monotonic() + extract = step_extract(state, ingest["events"]) + perf["extract_ms"] = _ms_since(t0) + step_results["extract"] = extract + + t0 = time.monotonic() + bridge = step_bridge(state) + perf["bridge_ms"] = _ms_since(t0) + step_results["bridge"] = bridge + + t0 = time.monotonic() + verify = step_verify(state, extract) + perf["verify_ms"] = _ms_since(t0) + step_results["verify"] = verify + + perf["total_ms"] = _ms_since(t_cycle) + state.record_perf(perf) + + if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]): + state.record_failure("verify", "; ".join(verify["issues"])) + step_report(state, verify) + return False + + state.record_success(step_results) + avgs = state.perf_averages() + log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status} — {perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══") + log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms") + + flag = LOG_DIR / "darkplex-loop-alert.flag" + if flag.exists(): + flag.unlink() + + return True + + except Exception as e: + perf["total_ms"] = _ms_since(t_cycle) + state.record_perf(perf) + step_name = "unknown" + for name in ["ingest", "extract", "bridge", "verify"]: + if name not in step_results: + step_name = name + break + log.error(f"CYCLE FAILED at {step_name}: {e}") + log.error(traceback.format_exc()) + state.record_failure(step_name, str(e)[:300]) + step_report(state, {"issues": [str(e)]}) + return False + + +def main(): + """CLI entry point for `darkplex loop`.""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(LOG_DIR / "darkplex-loop.log"), + logging.StreamHandler(), + ], + ) + LOG_DIR.mkdir(parents=True, exist_ok=True) + + args = sys.argv[1:] + + if "--status" in args: + print_status() + return + + if "--check" in args: + pending = check_new_events() + if pending > 0: + print(f"NEW: {pending} events pending") + sys.exit(0) + elif pending == 0: + print("NONE: No new events") + sys.exit(1) + else: + print("ERROR: Could not check") + sys.exit(2) + + once = "--once" in args + cycle_seconds = DEFAULT_CYCLE_SECONDS + + for i, arg in enumerate(args): + if arg == "--cycle" and i + 1 < len(args): + cycle_seconds = int(args[i + 1]) + + state = LoopState() + log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}") + + running = True + def handle_signal(sig, frame): + nonlocal running + log.info("Shutdown signal received") + running = False + signal.signal(signal.SIGTERM, handle_signal) + signal.signal(signal.SIGINT, handle_signal) + + while running: + run_cycle(state) + + if once: + break + + log.info(f"Sleeping {cycle_seconds}s until next cycle...") + for _ in range(cycle_seconds): + if not running: + break + time.sleep(1) + + log.info("Darkplex Loop stopped") diff --git a/cortex/intelligence/shared_memory.py b/cortex/intelligence/shared_memory.py new file mode 100644 index 0000000..6269835 --- /dev/null +++ b/cortex/intelligence/shared_memory.py @@ -0,0 +1,152 @@ +"""Cross-Agent Memory Bus: NATS pub/sub for agent insights. + +Agents publish insights (observations, learned facts, warnings) to the bus. +Other agents subscribe to topics relevant to their function. + +⚠️ DATA ISOLATION: Only Vainplex-internal agents participate. +""" + +from __future__ import annotations + +import json +import logging +import os +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Callable, Awaitable + +logger = logging.getLogger(__name__) + +NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222") + +# Only these agents are allowed to participate in shared memory +ALLOWED_AGENTS: set[str] = set( + os.environ.get("INTELLIGENCE_ALLOWED_AGENTS", "claudia,vera,stella,viola").split(",") +) + +INSIGHT_SUBJECT_PREFIX = "darkplex.intelligence.insights" + + +@dataclass +class Insight: + """An agent insight to be shared across the memory bus.""" + + agent: str + topic: str + content: str + confidence: float = 0.8 # 0.0-1.0 + tags: list[str] = field(default_factory=list) + timestamp: str = "" + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if not self.timestamp: + self.timestamp = datetime.now(timezone.utc).isoformat() + + def to_json(self) -> str: + return json.dumps({ + "agent": self.agent, + "topic": self.topic, + "content": self.content, + "confidence": self.confidence, + "tags": self.tags, + "timestamp": self.timestamp, + "metadata": self.metadata, + }) + + @classmethod + def from_json(cls, data: str) -> Insight: + d = json.loads(data) + return cls(**d) + + +InsightHandler = Callable[[Insight], Awaitable[None]] + + +class SharedMemory: + """Cross-agent memory bus using NATS pub/sub. + + Usage: + memory = SharedMemory(agent_name="claudia") + await memory.connect() + await memory.publish(Insight(agent="claudia", topic="infra", content="...")) + await memory.subscribe("infra", handler) + + ⚠️ Enforces data isolation: only allowed agents can publish/subscribe. + """ + + def __init__(self, agent_name: str, nats_url: str | None = None) -> None: + if agent_name not in ALLOWED_AGENTS: + raise ValueError( + f"Agent '{agent_name}' is not allowed in shared memory. " + f"Allowed: {ALLOWED_AGENTS}" + ) + self.agent_name = agent_name + self.nats_url = nats_url or NATS_URL + self._nats_client: Any = None + self._subscriptions: list[Any] = [] + + async def connect(self) -> None: + """Connect to the NATS server.""" + try: + import nats + self._nats_client = await nats.connect(self.nats_url) + logger.info("SharedMemory connected for agent '%s'", self.agent_name) + except Exception: + logger.exception("Failed to connect SharedMemory to NATS") + raise + + async def publish(self, insight: Insight) -> None: + """Publish an insight to the memory bus. + + Args: + insight: The insight to share. Agent field must match this instance's agent. + """ + if not self._nats_client: + raise RuntimeError("Not connected. Call connect() first.") + + if insight.agent not in ALLOWED_AGENTS: + raise ValueError(f"Agent '{insight.agent}' not allowed to publish insights") + + subject = f"{INSIGHT_SUBJECT_PREFIX}.{insight.topic}" + await self._nats_client.publish(subject, insight.to_json().encode()) + logger.debug( + "Published insight: %s/%s by %s", insight.topic, insight.content[:50], insight.agent + ) + + async def subscribe(self, topic: str, handler: InsightHandler) -> None: + """Subscribe to insights on a topic. + + Args: + topic: Topic to subscribe to (supports NATS wildcards). + handler: Async callback for received insights. + """ + if not self._nats_client: + raise RuntimeError("Not connected. Call connect() first.") + + subject = f"{INSIGHT_SUBJECT_PREFIX}.{topic}" + + async def _message_handler(msg: Any) -> None: + try: + insight = Insight.from_json(msg.data.decode()) + if insight.agent not in ALLOWED_AGENTS: + logger.warning( + "Ignoring insight from non-allowed agent: %s", insight.agent + ) + return + await handler(insight) + except Exception: + logger.exception("Error handling insight message") + + sub = await self._nats_client.subscribe(subject, cb=_message_handler) + self._subscriptions.append(sub) + logger.info("Subscribed to insights: %s", subject) + + async def close(self) -> None: + """Unsubscribe and disconnect.""" + for sub in self._subscriptions: + await sub.unsubscribe() + self._subscriptions.clear() + if self._nats_client: + await self._nats_client.close() + self._nats_client = None diff --git a/cortex/intelligence/temporal.py b/cortex/intelligence/temporal.py new file mode 100644 index 0000000..6c8684e --- /dev/null +++ b/cortex/intelligence/temporal.py @@ -0,0 +1,193 @@ +"""Temporal Context API: chronological knowledge retrieval. + +Queries NATS events and ChromaDB with a time dimension to answer: +"What do we know about X, chronologically?" +""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + +logger = logging.getLogger(__name__) + +# Default config from environment +NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222") +CHROMADB_URL = os.environ.get("CHROMADB_URL", "http://localhost:8000") + + +@dataclass +class TemporalEntry: + """A knowledge entry with temporal metadata.""" + + timestamp: datetime + source: str # "nats" or "chromadb" + topic: str + content: str + metadata: dict[str, Any] = field(default_factory=dict) + relevance_score: float = 0.0 + + +@dataclass +class TemporalQuery: + """Query parameters for temporal context retrieval.""" + + topic: str + start_time: datetime | None = None + end_time: datetime | None = None + limit: int = 50 + sources: list[str] = field(default_factory=lambda: ["nats", "chromadb"]) + + +class TemporalContext: + """Retrieves chronological knowledge from NATS events and ChromaDB. + + Usage: + ctx = TemporalContext() + entries = await ctx.query(TemporalQuery(topic="ssl-cert")) + """ + + def __init__( + self, + nats_url: str | None = None, + chromadb_url: str | None = None, + ) -> None: + self.nats_url = nats_url or NATS_URL + self.chromadb_url = chromadb_url or CHROMADB_URL + self._nats_client: Any = None + self._chroma_client: Any = None + + async def connect(self) -> None: + """Initialize connections to NATS and ChromaDB.""" + try: + import nats + self._nats_client = await nats.connect(self.nats_url) + logger.info("Connected to NATS: %s", self.nats_url) + except Exception: + logger.exception("Failed to connect to NATS") + + try: + import chromadb + self._chroma_client = chromadb.HttpClient(host=self.chromadb_url) + logger.info("Connected to ChromaDB: %s", self.chromadb_url) + except Exception: + logger.exception("Failed to connect to ChromaDB") + + async def query(self, query: TemporalQuery) -> list[TemporalEntry]: + """Query temporal context across configured sources. + + Returns entries sorted chronologically (oldest first). + """ + entries: list[TemporalEntry] = [] + + if "nats" in query.sources and self._nats_client: + nats_entries = await self._query_nats(query) + entries.extend(nats_entries) + + if "chromadb" in query.sources and self._chroma_client: + chroma_entries = self._query_chromadb(query) + entries.extend(chroma_entries) + + # Sort chronologically + entries.sort(key=lambda e: e.timestamp) + + # Apply limit + if query.limit: + entries = entries[:query.limit] + + return entries + + async def _query_nats(self, query: TemporalQuery) -> list[TemporalEntry]: + """Query NATS JetStream for historical events matching the topic.""" + entries: list[TemporalEntry] = [] + try: + js = self._nats_client.jetstream() + subject = f"darkplex.*.{query.topic}.>" + + # Get messages from the stream + sub = await js.subscribe(subject, ordered_consumer=True) + count = 0 + async for msg in sub.messages: + if count >= query.limit: + break + + timestamp = datetime.fromtimestamp( + msg.headers.get("Nats-Time-Stamp", 0) if msg.headers else 0, + tz=timezone.utc, + ) + + if query.start_time and timestamp < query.start_time: + continue + if query.end_time and timestamp > query.end_time: + continue + + entries.append(TemporalEntry( + timestamp=timestamp, + source="nats", + topic=query.topic, + content=msg.data.decode() if msg.data else "", + metadata={"subject": msg.subject}, + )) + count += 1 + + except Exception: + logger.exception("NATS temporal query failed for topic: %s", query.topic) + + return entries + + def _query_chromadb(self, query: TemporalQuery) -> list[TemporalEntry]: + """Query ChromaDB for semantically relevant entries with time filtering.""" + entries: list[TemporalEntry] = [] + try: + collection = self._chroma_client.get_or_create_collection("darkplex_knowledge") + + where_filter: dict[str, Any] = {} + if query.start_time: + where_filter["timestamp"] = {"$gte": query.start_time.isoformat()} + if query.end_time: + if "timestamp" in where_filter: + where_filter = { + "$and": [ + {"timestamp": {"$gte": query.start_time.isoformat()}}, + {"timestamp": {"$lte": query.end_time.isoformat()}}, + ] + } + else: + where_filter["timestamp"] = {"$lte": query.end_time.isoformat()} + + results = collection.query( + query_texts=[query.topic], + n_results=query.limit, + where=where_filter if where_filter else None, + ) + + if results and results.get("documents"): + for i, doc in enumerate(results["documents"][0]): + meta = results["metadatas"][0][i] if results.get("metadatas") else {} + ts_str = meta.get("timestamp", "") + try: + ts = datetime.fromisoformat(ts_str) + except (ValueError, TypeError): + ts = datetime.now(timezone.utc) + + entries.append(TemporalEntry( + timestamp=ts, + source="chromadb", + topic=query.topic, + content=doc, + metadata=meta, + relevance_score=results["distances"][0][i] if results.get("distances") else 0.0, + )) + + except Exception: + logger.exception("ChromaDB temporal query failed for topic: %s", query.topic) + + return entries + + async def close(self) -> None: + """Close connections.""" + if self._nats_client: + await self._nats_client.close() diff --git a/cortex/knowledge_extractor.py b/cortex/knowledge_extractor.py new file mode 100755 index 0000000..757a3a0 --- /dev/null +++ b/cortex/knowledge_extractor.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 +""" +Smart Extractor — Extract entities from NATS events and update knowledge graph. +Part of Level 4.4 AGI Roadmap. + +Usage: + smart-extractor.py --last 100 — Process last N events + smart-extractor.py --since 6h — Process events from last 6 hours + smart-extractor.py --dry-run — Show what would be extracted without saving +""" + +import sys +import os +import json +import subprocess +import re +import time +import logging +from pathlib import Path +from datetime import datetime + +# Import entity-manager functions +sys.path.insert(0, str(Path(__file__).parent)) +from importlib import import_module + +SCRIPT_DIR = Path(__file__).parent +LOG_DIR = Path.home() / "clawd" / "logs" +LOG_FILE = LOG_DIR / "entity-extraction.log" +KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge" +ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json" +RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json" +NATS_STREAM = "openclaw-events" +CONSUMER_NAME = "kg-extractor-temp" + +# Setup logging +LOG_DIR.mkdir(parents=True, exist_ok=True) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(LOG_FILE), + logging.StreamHandler(), + ], +) +log = logging.getLogger("smart-extractor") + + +def load_json(path): + try: + with open(path) as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + return {} + + +def save_json(path, data): + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def importance_heuristic(text): + """Simple importance scoring (0-1) based on content heuristics.""" + if not text: + return 0.0 + + score = 0.3 # base + + # Boost for substantive content + if len(text) > 200: + score += 0.1 + if len(text) > 500: + score += 0.1 + + # Boost for entity-rich content + caps = len(re.findall(r"\b[A-Z][a-z]+\b", text)) + if caps > 3: + score += 0.1 + if caps > 8: + score += 0.1 + + # Penalize heartbeat/cron noise + noise_patterns = ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"] + for p in noise_patterns: + if p.lower() in text.lower(): + score -= 0.3 + + # Boost for business/project content + boost_words = ["meeting", "project", "company", "contract", "decision", + "strategy", "budget", "deadline", "milestone", "partnership", + "investment", "revenue", "client", "proposal", "agreement"] + for w in boost_words: + if w in text.lower(): + score += 0.05 + + return max(0.0, min(1.0, score)) + + +def fetch_events_nats(last=None, since=None): + """Fetch events from NATS using consumer approach.""" + events = [] + + # Create a temporary pull consumer + filter_subj = "openclaw.events.main.conversation_message_in" + + # Use direct stream get instead of consumer (more reliable) + try: + # Get stream info for sequence range + info_result = subprocess.run( + ["nats", "stream", "info", NATS_STREAM, "--json"], + capture_output=True, text=True, timeout=10 + ) + if info_result.returncode != 0: + log.error("Failed to get stream info") + return events + + info = json.loads(info_result.stdout) + end_seq = info["state"]["last_seq"] + start_seq = info["state"]["first_seq"] + + # Calculate range + count = last or 500 + if since: + # Estimate start sequence from time + ms_since = parse_since(since) * 1000 + total_ms = (time.time() * 1000) - (datetime.fromisoformat(info["state"]["first_ts"].replace("Z", "+00:00")).timestamp() * 1000) + total_msgs = end_seq - start_seq + msgs_per_ms = total_msgs / total_ms if total_ms > 0 else 1 + fetch_start = max(start_seq, int(end_seq - ms_since * msgs_per_ms * 1.2)) + else: + fetch_start = max(start_seq, end_seq - count) + + # Only fetch conversation messages + log.info(f"Fetching sequences {fetch_start} - {end_seq}") + step = max(1, (end_seq - fetch_start) // count) + + for seq in range(fetch_start, end_seq + 1, step): + try: + result = subprocess.run( + ["nats", "stream", "get", NATS_STREAM, str(seq), "--json"], + capture_output=True, text=True, timeout=5 + ) + if result.returncode != 0: + continue + msg = json.loads(result.stdout) + subj = msg.get("subject", "") + if "conversation_message_in" not in subj: + continue + import base64 + # Input validation: max size check (1MB) + raw_data = msg.get("data", "") + if len(raw_data) > 1_048_576: + log.warning("Skipping oversized message at seq %d (%d bytes)", seq, len(raw_data)) + continue + try: + decoded = base64.b64decode(raw_data) + except Exception as e: + log.warning("Invalid base64 at seq %d: %s", seq, e) + continue + try: + data = json.loads(decoded.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + log.warning("Invalid JSON at seq %d: %s", seq, e) + continue + if not isinstance(data, dict): + log.warning("Expected dict at seq %d, got %s", seq, type(data).__name__) + continue + events.append(data) + except Exception: + continue + + log.info(f"Fetched {len(events)} conversation events") + + except subprocess.TimeoutExpired: + log.warning("NATS command timed out") + except FileNotFoundError: + log.warning("nats CLI not found — skipping NATS extraction") + + # Filter by time if --since specified + if since and events: + cutoff = parse_since(since) + if cutoff: + events = [e for e in events if e.get("timestamp", 0) / 1000 >= cutoff] + + return events + + +def parse_since(since_str): + """Parse duration string like '6h', '1d', '30m' to epoch timestamp.""" + m = re.match(r"(\d+)([hdm])", since_str) + if not m: + return None + val, unit = int(m.group(1)), m.group(2) + seconds = {"h": 3600, "d": 86400, "m": 60}[unit] + return time.time() - (val * seconds) + + +def extract_from_event(event, known_entities): + """Extract entities from a single event.""" + # Import extract_entities from entity_manager + em = sys.modules.get("entity_manager_mod") + if not em: + # Load entity-manager module + spec_path = Path(__file__).parent / "entity_manager.py" + import importlib.util + spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path) + em = importlib.util.module_from_spec(spec) + sys.modules["entity_manager_mod"] = em + spec.loader.exec_module(em) + + payload = event.get("payload", {}) + text = payload.get("text_preview", "") or payload.get("text", "") + if isinstance(text, list): + text = " ".join(str(t) for t in text) + if not isinstance(text, str): + text = str(text) + + if not text: + return {}, 0.0 + + score = importance_heuristic(text) + if score < 0.4: + return {}, score + + found = em.extract_entities(text, known_entities) + return found, score + + +def run_extraction(last=None, since=None, dry_run=False): + """Main extraction pipeline.""" + log.info(f"Starting extraction (last={last}, since={since}, dry_run={dry_run})") + + # Load known entities + spec_path = Path(__file__).parent / "entity_manager.py" + import importlib.util + spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path) + em = importlib.util.module_from_spec(spec) + sys.modules["entity_manager_mod"] = em + spec.loader.exec_module(em) + + known = em.load_known_entities() + log.info(f"Loaded {len(known)} known entities") + + # Fetch events + events = fetch_events_nats(last=last, since=since) + log.info(f"Fetched {len(events)} events from NATS") + + if not events: + log.info("No events to process") + return + + entities = em.load_json(ENTITIES_FILE) + relationships = em.load_json(RELATIONSHIPS_FILE) + + total_extracted = 0 + new_entities = 0 + new_relationships = 0 + ts_now = time.strftime("%Y-%m-%dT%H:%M:%S") + + for event in events: + found, score = extract_from_event(event, known) + if not found: + continue + + total_extracted += len(found) + names = list(found.keys()) + + # Add new entities + for name, info in found.items(): + if name not in entities: + entities[name] = { + "type": info["type"], + "source": "nats-extraction", + "first_seen": ts_now, + } + new_entities += 1 + known[name] = entities[name] + + # Create co-occurrence relationships between entities found in same message + if len(names) >= 2: + for i in range(len(names)): + for j in range(i + 1, min(len(names), i + 5)): # limit pairs + a, b = min(names[i], names[j]), max(names[i], names[j]) + key = f"{a}::{b}" + if key in relationships: + relationships[key]["count"] = relationships[key].get("count", 1) + 1 + relationships[key]["last_seen"] = ts_now + else: + relationships[key] = { + "a": a, "b": b, + "types": ["co-occurrence"], + "count": 1, + "first_seen": ts_now, + "last_seen": ts_now, + } + new_relationships += 1 + + if not dry_run and total_extracted % 50 == 0 and total_extracted > 0: + # Periodic save + em.save_json(ENTITIES_FILE, entities) + em.save_json(RELATIONSHIPS_FILE, relationships) + + if not dry_run: + em.save_json(ENTITIES_FILE, entities) + em.save_json(RELATIONSHIPS_FILE, relationships) + + log.info( + f"Done: {len(events)} events processed, {total_extracted} entities extracted, " + f"{new_entities} new entities, {new_relationships} new relationships" + ) + print( + f"\nResults: {len(events)} events → {total_extracted} entities extracted, " + f"{new_entities} new, {new_relationships} new relationships" + ) + + +def main(): + last = None + since = None + dry_run = False + + args = sys.argv[1:] + i = 0 + while i < len(args): + if args[i] == "--last" and i + 1 < len(args): + last = int(args[i + 1]) + i += 2 + elif args[i] == "--since" and i + 1 < len(args): + since = args[i + 1] + i += 2 + elif args[i] == "--dry-run": + dry_run = True + i += 1 + else: + print(__doc__) + sys.exit(1) + + if last is None and since is None: + last = 100 # default + + run_extraction(last=last, since=since, dry_run=dry_run) + + +if __name__ == "__main__": + main() diff --git a/cortex/llm_extractor.py b/cortex/llm_extractor.py new file mode 100644 index 0000000..a0ee339 --- /dev/null +++ b/cortex/llm_extractor.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +LLM-Powered Entity Extractor — Uses Ollama for Named Entity Recognition. + +Standalone module. No pip dependencies beyond stdlib. +Calls Ollama HTTP API with structured NER prompts. + +Configuration via environment variables: + DARKPLEX_OLLAMA_URL — Ollama base URL (default: http://localhost:11434) + DARKPLEX_OLLAMA_MODEL — Model name (default: mistral:7b) + DARKPLEX_OLLAMA_TIMEOUT — Timeout in seconds (default: 10) + DARKPLEX_EXTRACTOR — llm|regex|auto (default: auto) +""" + +import json +import logging +import os +import urllib.request +import urllib.error + +log = logging.getLogger("llm-extractor") + +OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434") +OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "mistral:7b") +OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30")) + +VALID_TYPES = {"person", "organization", "company", "project", "technology", + "location", "event", "concept", "product"} + +NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object. +Each key is the entity name (lowercase), each value has "type" and "context". + +Valid types: person, organization, company, project, technology, location, event, concept, product + +Rules: +- Skip common/generic words (the, system, message, etc.) +- Entity names should be lowercase, use hyphens for multi-word +- "context" is a 2-5 word description of the entity's role in the text +- If no entities found, return empty JSON object +- Return ONLY valid JSON, no explanation + +Text: +{text} + +JSON:""" + +BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object. +Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context". + +Valid types: person, organization, company, project, technology, location, event, concept, product + +Rules: +- Skip common/generic words +- "context" is a 2-5 word description +- If no entities found, return empty JSON object +- Return ONLY valid JSON, no markdown, no explanation + +Texts: +{texts} + +JSON:""" + + +def _call_ollama(prompt: str) -> str | None: + """Call Ollama generate API. Returns response text or None on failure.""" + payload = json.dumps({ + "model": OLLAMA_MODEL, + "prompt": prompt, + "stream": False, + "options": {"temperature": 0.1, "num_predict": 1024}, + }).encode() + + req = urllib.request.Request( + f"{OLLAMA_URL}/api/generate", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp: + data = json.loads(resp.read().decode()) + return data.get("response", "") + except (urllib.error.URLError, TimeoutError, OSError) as e: + log.warning(f"Ollama call failed: {e}") + return None + except Exception as e: + log.warning(f"Ollama unexpected error: {e}") + return None + + +def _parse_json_response(text: str) -> dict: + """Extract JSON dict from LLM response, handling markdown fences etc.""" + if not text: + return {} + # Strip markdown code fences + text = text.strip() + if text.startswith("```"): + lines = text.split("\n") + lines = [l for l in lines if not l.strip().startswith("```")] + text = "\n".join(lines) + + # Find the JSON object + start = text.find("{") + if start == -1: + return {} + + # Find matching closing brace + depth = 0 + for i in range(start, len(text)): + if text[i] == "{": + depth += 1 + elif text[i] == "}": + depth -= 1 + if depth == 0: + try: + return json.loads(text[start:i + 1]) + except json.JSONDecodeError: + return {} + return {} + + +def _normalize_entities(raw: dict) -> dict: + """Normalize and validate extracted entities.""" + result = {} + for name, info in raw.items(): + if not isinstance(info, dict): + continue + name = name.strip().lower().replace("_", "-").replace(" ", "-") + if len(name) < 2 or len(name) > 80: + continue + + etype = info.get("type", "unknown").lower().strip() + if etype not in VALID_TYPES: + # Map common aliases + aliases = {"org": "organization", "tech": "technology", "loc": "location", + "place": "location", "tool": "technology", "framework": "technology", + "language": "technology", "app": "product", "software": "product", + "service": "product", "group": "organization", "team": "organization"} + etype = aliases.get(etype, "concept") + + context = info.get("context", "") + if isinstance(context, str): + context = context[:100] + else: + context = "" + + result[name] = {"type": etype, "context": context, "match": "llm"} + + return result + + +def extract_entities_llm(text: str) -> dict[str, dict] | None: + """ + Extract entities from text using Ollama LLM. + + Returns dict of {name: {type, context, match}} or None if LLM unavailable. + None signals caller to fall back to regex. + """ + if not text or len(text) < 10: + return {} + + # Truncate very long texts + if len(text) > 2000: + text = text[:2000] + + prompt = NER_PROMPT.format(text=text) + response = _call_ollama(prompt) + if response is None: + return None # Signal fallback + + raw = _parse_json_response(response) + return _normalize_entities(raw) + + +def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None: + """ + Extract entities from multiple texts in one LLM call. + + Returns combined dict or None if LLM unavailable. + """ + if not texts: + return {} + + # Filter and truncate + clean = [] + for t in texts: + if t and len(t) >= 10: + clean.append(t[:500] if len(t) > 500 else t) + if not clean: + return {} + + # Limit batch size to keep prompt reasonable + if len(clean) > 10: + clean = clean[:10] + + numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean)) + prompt = BATCH_PROMPT.format(texts=numbered) + response = _call_ollama(prompt) + if response is None: + return None + + raw = _parse_json_response(response) + return _normalize_entities(raw) + + +def is_available() -> bool: + """Check if Ollama is reachable.""" + try: + req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET") + with urllib.request.urlopen(req, timeout=3) as resp: + return resp.status == 200 + except Exception: + return False diff --git a/cortex/loop.py b/cortex/loop.py new file mode 100644 index 0000000..681b0f3 --- /dev/null +++ b/cortex/loop.py @@ -0,0 +1,701 @@ +#!/usr/bin/env python3 +""" +Darkplex Loop — The single heartbeat of the intelligence pipeline. + +One process. One loop. One state machine. +Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health. + +Each cycle: + 1. INGEST — Fetch new events from NATS (batch consumer pull) + 2. EXTRACT — Pull entities and relationships from events + 3. BRIDGE — Sync cortex outputs to knowledge engine + 4. VERIFY — Check that real output was produced + 5. REPORT — Update state, alert on failure + +States: + RUNNING — Everything nominal + DEGRADED — A step failed, but loop continues with recovery attempts + EMERGENCY — Critical failure, alerting + +Usage: + darkplex loop # Run loop (default: 1h cycle) + darkplex loop --once # Single cycle, then exit + darkplex loop --cycle 3600 # Custom cycle interval (seconds) + darkplex loop --status # Print current state and exit + darkplex loop --check # Check for new events, exit 0=new 1=none +""" + +import json +import logging +import os +import re +import signal +import subprocess +import sys +import time +import traceback +from collections import deque +from datetime import datetime, timezone +from pathlib import Path + +# ── Paths (configurable via env) ───────────────────────────────────────────── + +BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd")) +SCRIPT_DIR = BASE_DIR / "scripts" +LEVEL4_DIR = SCRIPT_DIR / "level4" +LOG_DIR = BASE_DIR / "logs" +STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json" +KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge")) +ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json" +RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json" + +NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events") +NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop") +NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000")) +DEFAULT_CYCLE_SECONDS = 3600 # 1 hour +ALERT_COOLDOWN = 3600 # 1 alert per hour max + +log = logging.getLogger("darkplex-loop") + + +# ── State Machine ──────────────────────────────────────────────────────────── + +class LoopState: + """Persistent state for the Darkplex Loop.""" + + def __init__(self): + self.status = "INIT" + self.cycle_count = 0 + self.last_cycle = None + self.last_success = None + self.last_failure = None + self.last_alert = None + self.consecutive_failures = 0 + self.entities_total = 0 + self.relationships_total = 0 + self.entities_extracted_last = 0 + self.entities_new_last = 0 + self.events_processed_last = 0 + self.steps = {} + self.error = None + self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms + self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}] + self._load() + + def _load(self): + try: + data = json.loads(STATE_FILE.read_text()) + for k, v in data.items(): + if hasattr(self, k): + setattr(self, k, v) + except (FileNotFoundError, json.JSONDecodeError): + pass + + def save(self): + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str)) + + def record_perf(self, perf: dict): + """Record performance metrics for this cycle.""" + self.perf = perf + self.perf_history.append(perf) + self.perf_history = self.perf_history[-10:] # keep last 10 + + def perf_averages(self) -> dict: + """Running averages over last 10 cycles.""" + if not self.perf_history: + return {} + keys = self.perf_history[0].keys() + return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys} + + def record_success(self, step_results: dict): + self.status = "RUNNING" + self.consecutive_failures = 0 + self.last_success = datetime.now(timezone.utc).isoformat() + self.last_cycle = self.last_success + self.cycle_count += 1 + self.steps = step_results + self.error = None + self.save() + + def record_failure(self, step: str, error: str): + self.consecutive_failures += 1 + self.last_failure = datetime.now(timezone.utc).isoformat() + self.last_cycle = self.last_failure + self.cycle_count += 1 + self.error = f"{step}: {error}" + if self.consecutive_failures >= 3: + self.status = "EMERGENCY" + else: + self.status = "DEGRADED" + self.save() + + def can_alert(self) -> bool: + if not self.last_alert: + return True + try: + last = datetime.fromisoformat(self.last_alert) + return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN + except (ValueError, TypeError): + return True + + def mark_alerted(self): + self.last_alert = datetime.now(timezone.utc).isoformat() + self.save() + + +# ── Pipeline Steps ─────────────────────────────────────────────────────────── + +def _nats_cmd(): + """Build NATS CLI base command with auth.""" + nats_bin = os.environ.get("NATS_BIN", "nats") + nats_url = os.environ.get("NATS_URL", "") + if nats_url: + return [nats_bin, "-s", nats_url] + return [nats_bin] + + +def check_new_events() -> int: + """Return number of pending events in the consumer. 0 = nothing new.""" + try: + r = subprocess.run( + _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"], + capture_output=True, text=True, timeout=10, + ) + if r.returncode != 0: + return -1 + info = json.loads(r.stdout) + return info.get("num_pending", 0) + except Exception as e: + log.warning(f"check_new_events failed: {e}") + return -1 + + +def step_ingest(state: LoopState) -> dict: + """Step 1: Fetch new events from NATS using batch consumer pull.""" + log.info("STEP 1: INGEST — Fetching events from NATS") + + last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json" + + # Check how many pending + pending = check_new_events() + if pending == 0: + log.info("INGEST: No new events — skipping cycle") + return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"} + log.info(f"INGEST: {pending} pending events in consumer") + + events = [] + total_fetched = 0 + parse_errors = 0 + + # Fetch in batches + remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE + try: + batch_size = min(remaining, NATS_BATCH_SIZE) + result = subprocess.run( + _nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER, + "--count", str(batch_size), "--raw"], + capture_output=True, text=True, timeout=30, + ) + if result.returncode != 0: + log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential") + return _step_ingest_sequential(state) + + for line in result.stdout.strip().split("\n"): + if not line.strip(): + continue + try: + data = json.loads(line) + events.append(data) + total_fetched += 1 + except json.JSONDecodeError: + parse_errors += 1 + + except subprocess.TimeoutExpired: + log.warning("Batch fetch timed out, falling back to sequential") + return _step_ingest_sequential(state) + + # Update sequence tracking (get current stream seq from consumer info) + try: + r = subprocess.run( + _nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"], + capture_output=True, text=True, timeout=10, + ) + if r.returncode == 0: + info = json.loads(r.stdout) + stream_seq = info["delivered"]["stream_seq"] + last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True) + last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq})) + except Exception: + log.warning("Could not save last processed sequence") + + log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)") + return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors} + + +def _step_ingest_sequential(state: LoopState) -> dict: + """Fallback: sequential fetch via stream get (slow but reliable).""" + import base64 + log.info("INGEST FALLBACK: Sequential fetch") + + last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json" + last_processed_seq = 0 + try: + if last_processed_seq_file.exists(): + last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0) + except Exception: + pass + + r = subprocess.run( + _nats_cmd() + ["stream", "info", NATS_STREAM, "--json"], + capture_output=True, text=True, timeout=10, + ) + if r.returncode != 0: + return {"events": [], "total_scanned": 0, "skipped": 0} + + info = json.loads(r.stdout) + end_seq = info["state"]["last_seq"] + start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE) + + events = [] + skipped = 0 + for seq in range(start_seq, end_seq + 1): + try: + result = subprocess.run( + _nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode != 0: + skipped += 1 + continue + msg = json.loads(result.stdout) + if "conversation_message_in" not in msg.get("subject", ""): + skipped += 1 + continue + data = json.loads(base64.b64decode(msg["data"]).decode("utf-8")) + events.append(data) + except Exception: + skipped += 1 + + try: + last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True) + last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq})) + except Exception: + pass + + log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})") + return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped} + + +def step_extract(state: LoopState, events: list) -> dict: + """Step 2: Extract entities and relationships from events.""" + log.info(f"STEP 2: EXTRACT — Processing {len(events)} events") + + if not events: + log.info("EXTRACT: No events to process") + return {"extracted": 0, "new_entities": 0, "new_relationships": 0} + + sys.path.insert(0, str(LEVEL4_DIR)) + import importlib.util + spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py") + em = importlib.util.module_from_spec(spec) + spec.loader.exec_module(em) + + # Try LLM batch extraction first + from llm_extractor import extract_entities_llm_batch, is_available as llm_available + use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto") + llm_ok = use_llm and llm_available() + if llm_ok: + log.info("EXTRACT: Using LLM extractor (Ollama)") + else: + log.info("EXTRACT: Using regex extractor (fallback)") + + known = em.load_known_entities() + entities = em.load_json(ENTITIES_FILE) + relationships = em.load_json(RELATIONSHIPS_FILE) + + total_extracted = 0 + new_entities = 0 + new_relationships = 0 + ts_now = time.strftime("%Y-%m-%dT%H:%M:%S") + + # Prepare texts for potential batch LLM processing + event_texts = [] + for event in events: + payload = event.get("payload", {}) + text = payload.get("text_preview", "") or payload.get("text", "") + if isinstance(text, list): + parts = [] + for t in text: + parts.append(t.get("text", "") if isinstance(t, dict) else str(t)) + text = " ".join(parts) + if not isinstance(text, str): + text = str(text) + score = _importance(text) if text else 0.0 + event_texts.append((text, score)) + + # LLM batch extraction for qualifying texts + llm_results = {} + if llm_ok: + batch_texts = [t for t, s in event_texts if t and s >= 0.4] + if batch_texts: + consecutive_fails = 0 + for i in range(0, len(batch_texts), 10): + if consecutive_fails >= 3: + log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex") + llm_ok = False + break + chunk = batch_texts[i:i+10] + batch_result = extract_entities_llm_batch(chunk) + if batch_result: + llm_results.update(batch_result) + consecutive_fails = 0 + else: + consecutive_fails += 1 + if llm_results: + log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities") + + for idx, event in enumerate(events): + text, score = event_texts[idx] + if not text or score < 0.4: + continue + + if llm_ok and llm_results: + # Use LLM results + known entity matching + found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {} + # Add LLM entities that appear in this text + text_lower = text.lower() + for name, info in llm_results.items(): + variants = [name, name.replace("-", " "), name.replace("-", "")] + if any(v in text_lower for v in variants if len(v) > 2): + found[name] = info + else: + found = em.extract_entities(text, known) + if not found: + continue + + total_extracted += len(found) + names = list(found.keys()) + + for name, info in found.items(): + if name not in entities: + entities[name] = { + "type": info["type"], + "source": "darkplex-loop", + "first_seen": ts_now, + } + new_entities += 1 + known[name] = entities[name] + + if len(names) >= 2: + for i in range(len(names)): + for j in range(i + 1, min(len(names), i + 5)): + a, b = min(names[i], names[j]), max(names[i], names[j]) + key = f"{a}::{b}" + if key in relationships: + relationships[key]["count"] = relationships[key].get("count", 1) + 1 + relationships[key]["last_seen"] = ts_now + else: + relationships[key] = { + "a": a, "b": b, "types": ["co-occurrence"], + "count": 1, "first_seen": ts_now, "last_seen": ts_now, + } + new_relationships += 1 + + em.save_json(ENTITIES_FILE, entities) + em.save_json(RELATIONSHIPS_FILE, relationships) + + state.entities_total = len(entities) + state.relationships_total = len(relationships) + state.entities_extracted_last = total_extracted + state.entities_new_last = new_entities + state.events_processed_last = len(events) + + log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships") + return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships} + + +def step_bridge(state: LoopState) -> dict: + """Step 3: Run knowledge bridge.""" + log.info("STEP 3: BRIDGE — Syncing cortex outputs") + + bridge_script = SCRIPT_DIR / "knowledge-bridge.py" + if not bridge_script.exists(): + log.warning("BRIDGE: knowledge-bridge.py not found, skipping") + return {"status": "skipped", "reason": "script not found"} + + result = subprocess.run( + [sys.executable, str(bridge_script), "sync"], + capture_output=True, text=True, timeout=120, + ) + + if result.returncode != 0: + log.warning(f"BRIDGE: Failed — {result.stderr[:200]}") + return {"status": "failed", "error": result.stderr[:200]} + + bridged = 0 + for line in result.stdout.split("\n"): + m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I) + if m: + bridged += int(m.group(1)) + + log.info(f"BRIDGE: {bridged} items bridged") + return {"status": "ok", "bridged": bridged} + + +def step_verify(state: LoopState, extract_result: dict) -> dict: + """Step 4: Verify output quality.""" + log.info("STEP 4: VERIFY — Checking output quality") + + issues = [] + + for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]: + if not f.exists(): + issues.append(f"{label} file missing") + else: + try: + data = json.loads(f.read_text()) + if not data: + issues.append(f"{label} file is empty") + except json.JSONDecodeError: + issues.append(f"{label} file is corrupt JSON") + + events_processed = state.events_processed_last + extracted = extract_result.get("extracted", 0) + if events_processed > 10 and extracted == 0: + issues.append(f"0 entities from {events_processed} events — extraction may be broken") + + try: + r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10) + if r.returncode != 0: + issues.append("NATS unreachable") + except Exception as e: + issues.append(f"NATS check failed: {e}") + + verdict = "PASS" if not issues else "FAIL" + log.info(f"VERIFY: {verdict} — {len(issues)} issues") + for issue in issues: + log.warning(f" ⚠ {issue}") + + return {"verdict": verdict, "issues": issues} + + +def step_report(state: LoopState, verify_result: dict): + """Step 5: Alert if degraded/emergency.""" + if state.status == "RUNNING": + return + + if not state.can_alert(): + log.info("REPORT: Alert cooldown active, skipping") + return + + severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED" + msg = ( + f"Darkplex Loop {severity}\n" + f"Consecutive failures: {state.consecutive_failures}\n" + f"Error: {state.error}\n" + f"Issues: {', '.join(verify_result.get('issues', []))}" + ) + + log.warning(f"REPORT: Sending alert — {state.status}") + + try: + subprocess.run( + ["python3", str(SCRIPT_DIR / "vera-alert.py"), msg], + capture_output=True, text=True, timeout=15, + ) + except Exception: + pass + + flag = LOG_DIR / "darkplex-loop-alert.flag" + flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}") + state.mark_alerted() + + +# ── Helpers ────────────────────────────────────────────────────────────────── + +def _importance(text: str) -> float: + """Importance scoring for event text.""" + if not text: + return 0.0 + score = 0.3 + if len(text) > 200: score += 0.1 + if len(text) > 500: score += 0.1 + caps = len(re.findall(r"\b[A-Z][a-z]+\b", text)) + if caps > 3: score += 0.1 + if caps > 8: score += 0.1 + for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]: + if p.lower() in text.lower(): + score -= 0.3 + for w in ["meeting", "project", "company", "contract", "decision", "strategy", + "budget", "deadline", "milestone", "partnership", "investment", "revenue", + "client", "proposal", "agreement"]: + if w in text.lower(): + score += 0.05 + return max(0.0, min(1.0, score)) + + +def print_status(): + """Print current loop state.""" + state = LoopState() + + ent_count = rel_count = 0 + try: + ent_count = len(json.loads(ENTITIES_FILE.read_text())) + except Exception: + pass + try: + rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text())) + except Exception: + pass + + icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "⚪") + print(f"{icon} Status: {state.status}") + print(f"Cycles: {state.cycle_count}") + print(f"Last cycle: {state.last_cycle or 'never'}") + print(f"Last success: {state.last_success or 'never'}") + print(f"Last failure: {state.last_failure or 'never'}") + print(f"Failures: {state.consecutive_failures}") + print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)") + print(f"Relationships:{rel_count} total") + if state.error: + print(f"Error: {state.error}") + + +# ── Main Loop ──────────────────────────────────────────────────────────────── + +def _ms_since(t0: float) -> int: + return int((time.monotonic() - t0) * 1000) + + +def run_cycle(state: LoopState) -> bool: + """Run one complete pipeline cycle. Returns True on success.""" + log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══") + step_results = {} + perf = {} + t_cycle = time.monotonic() + + try: + t0 = time.monotonic() + ingest = step_ingest(state) + perf["ingest_ms"] = _ms_since(t0) + step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]} + + # Early skip if no new events + if ingest.get("skip_reason") == "no_new_events": + perf["total_ms"] = _ms_since(t_cycle) + state.record_perf(perf) + state.save() + log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══") + return True + + t0 = time.monotonic() + extract = step_extract(state, ingest["events"]) + perf["extract_ms"] = _ms_since(t0) + step_results["extract"] = extract + + t0 = time.monotonic() + bridge = step_bridge(state) + perf["bridge_ms"] = _ms_since(t0) + step_results["bridge"] = bridge + + t0 = time.monotonic() + verify = step_verify(state, extract) + perf["verify_ms"] = _ms_since(t0) + step_results["verify"] = verify + + perf["total_ms"] = _ms_since(t_cycle) + state.record_perf(perf) + + if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]): + state.record_failure("verify", "; ".join(verify["issues"])) + step_report(state, verify) + return False + + state.record_success(step_results) + avgs = state.perf_averages() + log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status} — {perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══") + log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms") + + flag = LOG_DIR / "darkplex-loop-alert.flag" + if flag.exists(): + flag.unlink() + + return True + + except Exception as e: + perf["total_ms"] = _ms_since(t_cycle) + state.record_perf(perf) + step_name = "unknown" + for name in ["ingest", "extract", "bridge", "verify"]: + if name not in step_results: + step_name = name + break + log.error(f"CYCLE FAILED at {step_name}: {e}") + log.error(traceback.format_exc()) + state.record_failure(step_name, str(e)[:300]) + step_report(state, {"issues": [str(e)]}) + return False + + +def main(): + """CLI entry point for `darkplex loop`.""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(LOG_DIR / "darkplex-loop.log"), + logging.StreamHandler(), + ], + ) + LOG_DIR.mkdir(parents=True, exist_ok=True) + + args = sys.argv[1:] + + if "--status" in args: + print_status() + return + + if "--check" in args: + pending = check_new_events() + if pending > 0: + print(f"NEW: {pending} events pending") + sys.exit(0) + elif pending == 0: + print("NONE: No new events") + sys.exit(1) + else: + print("ERROR: Could not check") + sys.exit(2) + + once = "--once" in args + cycle_seconds = DEFAULT_CYCLE_SECONDS + + for i, arg in enumerate(args): + if arg == "--cycle" and i + 1 < len(args): + cycle_seconds = int(args[i + 1]) + + state = LoopState() + log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}") + + running = True + def handle_signal(sig, frame): + nonlocal running + log.info("Shutdown signal received") + running = False + signal.signal(signal.SIGTERM, handle_signal) + signal.signal(signal.SIGINT, handle_signal) + + while running: + run_cycle(state) + + if once: + break + + log.info(f"Sleeping {cycle_seconds}s until next cycle...") + for _ in range(cycle_seconds): + if not running: + break + time.sleep(1) + + log.info("Darkplex Loop stopped") diff --git a/pyproject.toml b/pyproject.toml index e534ef0..b80ea87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,9 +3,9 @@ requires = ["setuptools>=68.0", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "cortex" -version = "0.1.0" -description = "Intelligence layer for OpenClaw — triage, health, feedback, memory hygiene, roadmap, validation" +name = "darkplex-core" +version = "0.2.0" +description = "Darkplex Intelligence Layer — triage, health, feedback, governance, knowledge extraction, memory hygiene, roadmap, validation" readme = "README.md" requires-python = ">=3.11" license = {text = "MIT"} @@ -15,6 +15,7 @@ authors = [ [project.scripts] cortex = "cortex.cli:main" +darkplex = "cortex.cli:main" [tool.setuptools.packages.find] include = ["cortex*"] diff --git a/tests/test_anticipator.py b/tests/test_anticipator.py new file mode 100644 index 0000000..6fe16bc --- /dev/null +++ b/tests/test_anticipator.py @@ -0,0 +1,106 @@ +"""Tests for intelligence/anticipator module.""" + +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from cortex.intelligence.anticipator import ( + AlertSeverity, + Anticipator, + PatternDefinition, + Prediction, + _detect_recurring_errors, + _detect_ssl_expiry, + _detect_usage_spike, +) + + +class TestAnticipatorInit: + def test_creates_with_builtin_patterns(self): + a = Anticipator() + assert len(a.patterns) == 3 + + def test_register_custom_pattern(self): + a = Anticipator() + p = PatternDefinition(name="test", description="test", detector=lambda e: None) + a.register_pattern(p) + assert len(a.patterns) == 4 + + +class TestAnalyze: + def test_empty_events(self): + a = Anticipator() + result = a.analyze([]) + assert result == [] + + def test_no_matching_patterns(self): + a = Anticipator() + result = a.analyze([{"type": "unrelated", "data": {}}]) + assert result == [] + + def test_detector_exception_handled(self): + def bad_detector(events): + raise RuntimeError("boom") + + a = Anticipator() + a.patterns = [PatternDefinition(name="bad", description="", detector=bad_detector)] + result = a.analyze([{}]) + assert result == [] + + +class TestSSLExpiry: + def test_no_ssl_events(self): + assert _detect_ssl_expiry([{"type": "other"}]) is None + + def test_expiring_soon(self): + expiry = (datetime.now(timezone.utc) + timedelta(days=5)).isoformat() + events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}] + result = _detect_ssl_expiry(events) + assert result is not None + assert result.severity == AlertSeverity.WARNING + + def test_expiring_critical(self): + expiry = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat() + events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}] + result = _detect_ssl_expiry(events) + assert result.severity == AlertSeverity.CRITICAL + + def test_not_expiring(self): + expiry = (datetime.now(timezone.utc) + timedelta(days=60)).isoformat() + events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}] + assert _detect_ssl_expiry(events) is None + + +class TestRecurringErrors: + def test_no_errors(self): + assert _detect_recurring_errors([]) is None + + def test_few_errors(self): + events = [{"type": "error", "data": {"error_type": "timeout"}}] * 2 + assert _detect_recurring_errors(events) is None + + def test_recurring_detected(self): + events = [{"type": "error", "data": {"error_type": "timeout"}}] * 5 + result = _detect_recurring_errors(events) + assert result is not None + assert result.metadata["count"] == 5 + + +class TestUsageSpike: + def test_insufficient_data(self): + assert _detect_usage_spike([]) is None + + def test_normal_usage(self): + events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(15)] + assert _detect_usage_spike(events) is None + + def test_spike_detected(self): + events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(12)] + events[-1]["data"]["value"] = 100 + events[-2]["data"]["value"] = 100 + events[-3]["data"]["value"] = 100 + result = _detect_usage_spike(events) + assert result is not None diff --git a/tests/test_collective.py b/tests/test_collective.py new file mode 100644 index 0000000..abe848e --- /dev/null +++ b/tests/test_collective.py @@ -0,0 +1,112 @@ +"""Tests for intelligence/collective module.""" + +import asyncio +import sys +from pathlib import Path +from unittest import mock + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from cortex.intelligence.shared_memory import Insight, SharedMemory, ALLOWED_AGENTS +from cortex.intelligence.collective import AggregatedPattern, CollectiveLearning + + +class TestCollectiveLearningInit: + def test_init(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + assert cl._patterns == [] + assert len(cl._insights_by_topic) == 0 + + +class TestPatternDetection: + def test_no_patterns_with_single_agent(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + # Add insights from same agent + agent = list(ALLOWED_AGENTS)[0] + for i in range(5): + cl._insights_by_topic["infra"].append( + Insight(agent=agent, topic="infra", content=f"test {i}") + ) + cl._detect_patterns() + assert len(cl._patterns) == 0 + + def test_pattern_with_multiple_agents(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + agents = list(ALLOWED_AGENTS)[:2] + cl._insights_by_topic["infra"].append( + Insight(agent=agents[0], topic="infra", content="observation 1") + ) + cl._insights_by_topic["infra"].append( + Insight(agent=agents[1], topic="infra", content="observation 2") + ) + cl._detect_patterns() + assert len(cl._patterns) == 1 + assert cl._patterns[0].topic == "infra" + + +class TestGetPatterns: + def test_filter_by_topic(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + agents = list(ALLOWED_AGENTS)[:2] + for topic in ["infra", "security"]: + for agent in agents: + cl._insights_by_topic[topic].append( + Insight(agent=agent, topic=topic, content="test") + ) + cl._detect_patterns() + assert len(cl.get_patterns(topic="infra")) == 1 + + def test_filter_by_confidence(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + agents = list(ALLOWED_AGENTS)[:2] + cl._insights_by_topic["low"].append( + Insight(agent=agents[0], topic="low", content="x", confidence=0.1) + ) + cl._insights_by_topic["low"].append( + Insight(agent=agents[1], topic="low", content="y", confidence=0.1) + ) + cl._detect_patterns() + assert len(cl.get_patterns(min_confidence=0.5)) == 0 + + +class TestTopicSummary: + def test_empty(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + assert cl.get_topic_summary() == {} + + +class TestExportKnowledge: + def test_export_json(self): + import json + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + data = json.loads(cl.export_knowledge()) + assert "patterns" in data + assert "topics" in data + assert "allowed_agents" in data + + +class TestHandleInsight: + @pytest.mark.asyncio + async def test_rejects_non_allowed_agent(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + insight = Insight(agent="unauthorized_agent", topic="test", content="bad") + await cl._handle_insight(insight) + assert len(cl._insights_by_topic) == 0 + + @pytest.mark.asyncio + async def test_accepts_allowed_agent(self): + sm = mock.AsyncMock(spec=SharedMemory) + cl = CollectiveLearning(sm) + agent = list(ALLOWED_AGENTS)[0] + insight = Insight(agent=agent, topic="test", content="good") + await cl._handle_insight(insight) + assert len(cl._insights_by_topic["test"]) == 1 diff --git a/tests/test_entity_manager.py b/tests/test_entity_manager.py new file mode 100644 index 0000000..3351e76 --- /dev/null +++ b/tests/test_entity_manager.py @@ -0,0 +1,111 @@ +"""Tests for entity_manager module.""" + +import json +import sys +import tempfile +from pathlib import Path +from unittest import mock + +import pytest + +# Add parent to path +sys.path.insert(0, str(Path(__file__).parent.parent)) +import cortex.entity_manager as em + + +class TestNormalize: + def test_basic(self): + assert em.normalize("Hello World") == "hello world" + + def test_underscores(self): + assert em.normalize("my_entity") == "my-entity" + + def test_whitespace(self): + assert em.normalize(" test ") == "test" + + +class TestLoadJson: + def test_missing_file(self): + assert em.load_json(Path("/nonexistent/file.json")) == {} + + def test_valid_json(self, tmp_path): + f = tmp_path / "test.json" + f.write_text('{"key": "value"}') + assert em.load_json(f) == {"key": "value"} + + def test_invalid_json(self, tmp_path): + f = tmp_path / "bad.json" + f.write_text("not json") + assert em.load_json(f) == {} + + +class TestSaveJson: + def test_creates_dirs(self, tmp_path): + f = tmp_path / "sub" / "dir" / "test.json" + em.save_json(f, {"hello": "world"}) + assert json.loads(f.read_text()) == {"hello": "world"} + + +class TestExtractEntities: + def test_known_entity(self): + known = {"acme-corp": {"type": "company"}} + result = em.extract_entities("Working with Acme Corp today", known) + assert "acme-corp" in result + + def test_mention(self): + result = em.extract_entities("Talked to @johndoe about it", {}) + assert "johndoe" in result + assert result["johndoe"]["type"] == "person" + + def test_capitalized_multi_word(self): + result = em.extract_entities("Met with John Smith yesterday", {}) + assert "john smith" in result + + def test_acronym(self): + result = em.extract_entities("The ACME project is going well", {}) + assert "acme" in result + assert result["acme"]["type"] == "organization" + + def test_stop_words_filtered(self): + result = em.extract_entities("The system is working fine", {}) + # None of these should be extracted as entities + for word in ["the", "system", "working"]: + assert word not in result + + def test_empty_text(self): + result = em.extract_entities("", {}) + assert result == {} + + def test_short_mention_filtered(self): + """Mentions shorter than 3 chars should be filtered.""" + result = em.extract_entities("@ab said hi", {}) + assert "ab" not in result + + +class TestCmdBootstrap: + def test_bootstrap_with_empty_areas(self, tmp_path): + with mock.patch.object(em, "LIFE_AREAS", tmp_path): + with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"): + with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"): + em.cmd_bootstrap() + assert (tmp_path / "entities.json").exists() + + +class TestCmdRelate: + def test_create_relationship(self, tmp_path): + with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"): + with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"): + em.cmd_relate("Alice", "Bob", "colleague") + rels = json.loads((tmp_path / "rels.json").read_text()) + assert len(rels) == 1 + key = list(rels.keys())[0] + assert "colleague" in rels[key]["types"] + + def test_update_relationship(self, tmp_path): + with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"): + with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"): + em.cmd_relate("Alice", "Bob", "colleague") + em.cmd_relate("Alice", "Bob", "friend") + rels = json.loads((tmp_path / "rels.json").read_text()) + key = list(rels.keys())[0] + assert rels[key]["count"] == 2 diff --git a/tests/test_governance_enforcer.py b/tests/test_governance_enforcer.py new file mode 100644 index 0000000..6d065e9 --- /dev/null +++ b/tests/test_governance_enforcer.py @@ -0,0 +1,79 @@ +"""Tests for governance/enforcer.py — Runtime Enforcer.""" + +import sys +from pathlib import Path + +import yaml +import pytest + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + +from governance.enforcer import Enforcer, Decision +from governance.policy import PolicyEngine +from governance.risk_scorer import RiskScorer +from governance.evidence import EvidenceCollector, ControlMapping + + +def _make_enforcer(tmp_path, rules=None): + if rules: + policy_file = tmp_path / "test.yaml" + policy_file.write_text(yaml.dump({ + "name": "test", "description": "", "version": "1", + "rules": rules, + })) + engine = PolicyEngine(policies_dir=str(tmp_path)) + scorer = RiskScorer() + collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null")) + return Enforcer(policy_engine=engine, risk_scorer=scorer, evidence_collector=collector) + + +class TestDecision: + def test_approved(self): + from governance.risk_scorer import RiskResult + d = Decision(verdict="approve", reason="ok", risk=RiskResult(0, "low", {}), policy_result={}) + assert d.approved + + def test_not_approved(self): + from governance.risk_scorer import RiskResult + d = Decision(verdict="deny", reason="no", risk=RiskResult(9, "critical", {}), policy_result={}) + assert not d.approved + + +class TestEnforcer: + def test_default_allow(self, tmp_path): + enforcer = _make_enforcer(tmp_path) + decision = enforcer.evaluate({"agent": "claudia", "action": "read", "hour": 12}) + assert decision.verdict == "allow" + + def test_policy_deny(self, tmp_path): + enforcer = _make_enforcer(tmp_path, rules=[ + {"name": "deny-ext", "conditions": {"target": "external"}, "effect": "deny", "priority": 10}, + ]) + decision = enforcer.evaluate({"agent": "claudia", "action": "send", "target": "external", "hour": 12}) + assert decision.verdict == "deny" + + def test_risk_override(self, tmp_path): + """High risk should override an allow policy to escalate.""" + enforcer = _make_enforcer(tmp_path, rules=[ + {"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1}, + ]) + decision = enforcer.evaluate({ + "agent": "claudia", "action": "export", + "data_type": "restricted", "target": "external", "hour": 12, + }) + # Risk should be high/critical, overriding the allow + assert decision.verdict in ("deny", "escalate") + + def test_evidence_recorded(self, tmp_path): + enforcer = _make_enforcer(tmp_path) + enforcer.evaluate({"agent": "test", "action": "read", "hour": 12}) + assert len(enforcer.evidence_collector.evidence) == 1 + + def test_data_classification_alias(self, tmp_path): + enforcer = _make_enforcer(tmp_path) + decision = enforcer.evaluate({ + "agent": "test", "action": "read", + "data_classification": "confidential", "hour": 12, + }) + # Should use data_classification as data_type + assert decision.risk.factors["data_type"]["value"] == "confidential" diff --git a/tests/test_governance_evidence.py b/tests/test_governance_evidence.py new file mode 100644 index 0000000..7720d7e --- /dev/null +++ b/tests/test_governance_evidence.py @@ -0,0 +1,86 @@ +"""Tests for governance/evidence.py — Evidence Collector & Control Mapping.""" + +import json +import sys +from pathlib import Path + +import yaml +import pytest + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + +from governance.evidence import EvidenceCollector, EvidenceRecord, ControlMapping + + +class TestControlMapping: + def test_missing_file(self): + m = ControlMapping(mapping_path="/nonexistent/path.yaml") + assert m.get_controls("anything") == [] + + def test_load_mapping(self, tmp_path): + mapping_file = tmp_path / "mapping.yaml" + mapping_file.write_text(yaml.dump({ + "mappings": [ + {"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]}, + {"event_types": ["access_request", "data_export"], "controls": ["A.9.1"]}, + ] + })) + m = ControlMapping(mapping_path=str(mapping_file)) + assert "A.5.1" in m.get_controls("policy_evaluation") + assert "A.9.1" in m.get_controls("access_request") + assert m.get_controls("unknown_event") == [] + + +class TestEvidenceRecord: + def test_to_dict(self): + r = EvidenceRecord( + timestamp="2026-01-01T00:00:00Z", + event_type="test", + agent="claudia", + action="read", + verdict="allow", + risk_score=2, + risk_level="low", + controls=["A.5.1"], + ) + d = r.to_dict() + assert d["agent"] == "claudia" + assert d["controls"] == ["A.5.1"] + + +class TestEvidenceCollector: + def setup_method(self): + self.collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null")) + + def test_record(self): + rec = self.collector.record( + event_type="policy_evaluation", + agent="claudia", + action="send_email", + verdict="allow", + risk_score=3, + risk_level="low", + ) + assert rec.agent == "claudia" + assert len(self.collector.evidence) == 1 + + def test_filter_by_agent(self): + self.collector.record(event_type="e", agent="a", action="x", verdict="allow") + self.collector.record(event_type="e", agent="b", action="x", verdict="deny") + assert len(self.collector.get_evidence(agent="a")) == 1 + + def test_filter_by_verdict(self): + self.collector.record(event_type="e", agent="a", action="x", verdict="allow") + self.collector.record(event_type="e", agent="a", action="y", verdict="deny") + assert len(self.collector.get_evidence(verdict="deny")) == 1 + + def test_export_json(self): + self.collector.record(event_type="e", agent="a", action="x", verdict="allow") + exported = self.collector.export_json() + data = json.loads(exported) + assert len(data) == 1 + assert data[0]["agent"] == "a" + + def test_empty_evidence(self): + assert self.collector.get_evidence() == [] + assert json.loads(self.collector.export_json()) == [] diff --git a/tests/test_governance_policy.py b/tests/test_governance_policy.py new file mode 100644 index 0000000..1ec04dc --- /dev/null +++ b/tests/test_governance_policy.py @@ -0,0 +1,126 @@ +"""Tests for governance/policy.py — Policy Engine. + +NOTE: This module exists only in darkplex-core. Tests written against the module API. +""" + +import os +import tempfile +import pytest +from pathlib import Path + +# We need yaml for creating test fixtures +import yaml + + +def _write_policy(tmpdir, filename, data): + path = Path(tmpdir) / filename + path.write_text(yaml.dump(data)) + return path + + +class TestRule: + def setup_method(self): + import sys + sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + from governance.policy import Rule + self.Rule = Rule + + def test_matches_simple(self): + r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow") + assert r.matches({"agent": "claudia"}) + + def test_no_match(self): + r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow") + assert not r.matches({"agent": "other"}) + + def test_missing_key(self): + r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow") + assert not r.matches({}) + + def test_list_condition(self): + r = self.Rule(name="r1", conditions={"action": ["read", "write"]}, effect="allow") + assert r.matches({"action": "read"}) + assert not r.matches({"action": "delete"}) + + def test_multiple_conditions(self): + r = self.Rule(name="r1", conditions={"agent": "claudia", "action": "send"}, effect="deny") + assert r.matches({"agent": "claudia", "action": "send"}) + assert not r.matches({"agent": "claudia", "action": "read"}) + + +class TestPolicyEngine: + def setup_method(self): + import sys + sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + from governance.policy import PolicyEngine + self.PolicyEngine = PolicyEngine + + def test_empty_dir(self, tmp_path): + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + assert engine.policies == [] + + def test_nonexistent_dir(self, tmp_path): + engine = self.PolicyEngine(policies_dir=str(tmp_path / "nope")) + assert engine.policies == [] + + def test_load_policy(self, tmp_path): + _write_policy(tmp_path, "test.yaml", { + "name": "test-policy", + "description": "Test", + "version": "1.0.0", + "rules": [ + {"name": "deny-external", "conditions": {"target": "external"}, "effect": "deny", "priority": 10}, + ], + }) + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + assert len(engine.policies) == 1 + assert engine.policies[0].name == "test-policy" + assert len(engine.policies[0].rules) == 1 + + def test_evaluate_no_match(self, tmp_path): + _write_policy(tmp_path, "test.yaml", { + "name": "p", "description": "", "version": "1", + "rules": [{"name": "r1", "conditions": {"agent": "x"}, "effect": "deny"}], + }) + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + result = engine.evaluate({"agent": "y"}) + assert result["verdict"] == "allow" + + def test_evaluate_match_deny(self, tmp_path): + _write_policy(tmp_path, "test.yaml", { + "name": "p", "description": "", "version": "1", + "rules": [{"name": "r1", "conditions": {"target": "external"}, "effect": "deny", "priority": 5}], + }) + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + result = engine.evaluate({"target": "external"}) + assert result["verdict"] == "deny" + + def test_priority_ordering(self, tmp_path): + _write_policy(tmp_path, "test.yaml", { + "name": "p", "description": "", "version": "1", + "rules": [ + {"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1}, + {"name": "deny-ext", "conditions": {"agent": "claudia"}, "effect": "deny", "priority": 10}, + ], + }) + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + result = engine.evaluate({"agent": "claudia"}) + assert result["verdict"] == "deny" # higher priority wins + + def test_reload(self, tmp_path): + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + assert len(engine.policies) == 0 + _write_policy(tmp_path, "new.yaml", { + "name": "new", "description": "", "version": "1", "rules": [], + }) + engine.reload() + assert len(engine.policies) == 1 + + def test_skips_schema_yaml(self, tmp_path): + _write_policy(tmp_path, "schema.yaml", {"name": "schema"}) + _write_policy(tmp_path, "real.yaml", { + "name": "real", "description": "", "version": "1", "rules": [], + }) + engine = self.PolicyEngine(policies_dir=str(tmp_path)) + assert len(engine.policies) == 1 + assert engine.policies[0].name == "real" diff --git a/tests/test_governance_report.py b/tests/test_governance_report.py new file mode 100644 index 0000000..c8df768 --- /dev/null +++ b/tests/test_governance_report.py @@ -0,0 +1,57 @@ +"""Tests for governance/report_generator.py.""" + +import json +import sys +from pathlib import Path + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + +from governance.evidence import EvidenceCollector, ControlMapping +from governance.report_generator import ReportGenerator + + +class TestReportGenerator: + def _collector_with_mapping(self, tmp_path): + import yaml + mapping_file = tmp_path / "mapping.yaml" + mapping_file.write_text(yaml.dump({ + "mappings": [ + {"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]}, + ] + })) + return EvidenceCollector(control_mapping=ControlMapping(str(mapping_file))) + + def test_empty_report(self): + collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null")) + gen = ReportGenerator(collector) + report = gen.generate_compliance_report() + assert report["status"] == "no_evidence" + + def test_report_with_evidence(self, tmp_path): + collector = self._collector_with_mapping(tmp_path) + collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow", risk_score=2, risk_level="low") + collector.record(event_type="policy_evaluation", agent="claudia", action="write", verdict="deny", risk_score=8, risk_level="high") + + gen = ReportGenerator(collector) + report = gen.generate_compliance_report() + assert report["total_evidence"] == 2 + assert "A.5.1" in report["controls_covered"] + assert report["summary"]["total_deny"] == 1 + assert report["summary"]["high_risk_events"] == 1 + + def test_agent_report(self, tmp_path): + collector = self._collector_with_mapping(tmp_path) + collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow") + collector.record(event_type="policy_evaluation", agent="other", action="read", verdict="deny") + + gen = ReportGenerator(collector) + report = gen.generate_agent_report("claudia") + assert report["agent"] == "claudia" + assert report["total_actions"] == 1 + + def test_export_json(self): + collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null")) + gen = ReportGenerator(collector) + output = gen.export_json() + data = json.loads(output) + assert "status" in data # empty report diff --git a/tests/test_governance_risk_scorer.py b/tests/test_governance_risk_scorer.py new file mode 100644 index 0000000..e695fa2 --- /dev/null +++ b/tests/test_governance_risk_scorer.py @@ -0,0 +1,80 @@ +"""Tests for governance/risk_scorer.py.""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + +from governance.risk_scorer import RiskScorer, RiskResult, _classify_level + + +class TestClassifyLevel: + def test_low(self): + assert _classify_level(0) == "low" + assert _classify_level(3) == "low" + + def test_elevated(self): + assert _classify_level(4) == "elevated" + assert _classify_level(6) == "elevated" + + def test_high(self): + assert _classify_level(7) == "high" + assert _classify_level(8) == "high" + + def test_critical(self): + assert _classify_level(9) == "critical" + assert _classify_level(10) == "critical" + + +class TestRiskResult: + def test_acceptable(self): + r = RiskResult(value=3, level="low", factors={}) + assert r.is_acceptable + + def test_not_acceptable(self): + r = RiskResult(value=7, level="high", factors={}) + assert not r.is_acceptable + + +class TestRiskScorer: + def setup_method(self): + self.scorer = RiskScorer() + + def test_default_low_risk(self): + result = self.scorer.score({"hour": 12}) + assert result.level == "low" + assert result.is_acceptable + + def test_public_internal(self): + result = self.scorer.score({"data_type": "public", "target": "internal", "hour": 12}) + assert result.value <= 3 + + def test_confidential_external(self): + result = self.scorer.score({"data_type": "confidential", "target": "external", "hour": 12}) + assert result.value >= 7 + + def test_restricted_critical(self): + result = self.scorer.score({"data_type": "restricted", "target": "external", "hour": 12}) + assert result.level in ("high", "critical") + + def test_off_hours_bonus(self): + day = self.scorer.score({"data_type": "internal", "hour": 12}) + night = self.scorer.score({"data_type": "internal", "hour": 2}) + assert night.value > day.value + + def test_admin_role_reduces_risk(self): + admin = self.scorer.score({"agent_role": "admin", "hour": 12}) + external = self.scorer.score({"agent_role": "external", "hour": 12}) + assert admin.value < external.value + + def test_factors_populated(self): + result = self.scorer.score({"data_type": "internal", "target": "external", "hour": 10}) + assert "data_type" in result.factors + assert "target" in result.factors + assert "agent_role" in result.factors + assert "time_of_day" in result.factors + + def test_clamped_0_10(self): + # Even with extreme values, should be 0-10 + result = self.scorer.score({"data_type": "restricted", "target": "external", "agent_role": "external", "hour": 3}) + assert 0 <= result.value <= 10 diff --git a/tests/test_knowledge_cleanup.py b/tests/test_knowledge_cleanup.py new file mode 100644 index 0000000..9f6c213 --- /dev/null +++ b/tests/test_knowledge_cleanup.py @@ -0,0 +1,136 @@ +"""Tests for intelligence/knowledge_cleanup.py — Knowledge Graph Cleanup.""" + +import json +import math +import sys +from datetime import datetime, timedelta +from pathlib import Path +from unittest.mock import patch, MagicMock + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence")) + +import knowledge_cleanup as kc + + +class TestBackup: + def test_creates_backup(self, tmp_path): + src = tmp_path / "test.json" + src.write_text('{"a": 1}') + backup_path = kc.backup(src) + assert backup_path.exists() + assert "backup_" in backup_path.name + +class TestAtomicWrite: + def test_writes_atomically(self, tmp_path): + path = tmp_path / "out.json" + kc.atomic_write(path, {"key": "value"}) + assert json.loads(path.read_text()) == {"key": "value"} + + +class TestFindDuplicates: + def test_no_duplicates(self): + entities = {"albert": {}, "mondo-gate": {}} + groups = kc.find_duplicates(entities) + assert len(groups) == 0 + + def test_case_duplicates(self): + entities = {"Albert": {}, "albert": {}, "ALBERT": {}} + groups = kc.find_duplicates(entities) + assert len(groups) >= 1 + + def test_substring_duplicates(self): + entities = {"mondo": {"type": "company"}, "mondo gate": {"type": "company"}} + groups = kc.find_duplicates(entities) + assert len(groups) >= 1 + + +class TestPickCanonical: + def test_prefers_uppercase(self): + names = ["albert", "Albert"] + entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}} + assert kc.pick_canonical(names, entities) == "Albert" + + def test_prefers_more_fields(self): + names = ["a", "A"] + entities = {"a": {"type": "person"}, "A": {"type": "person", "source": "x", "extra": "y"}} + assert kc.pick_canonical(names, entities) == "A" + + +class TestDeduplicate: + def test_merges_entities(self): + entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}} + rels = {} + e, r = kc.deduplicate(entities, rels, dry_run=False) + assert len(e) == 1 + + def test_dry_run_no_change(self): + entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}} + rels = {} + e, r = kc.deduplicate(entities, rels, dry_run=True) + assert len(e) == 2 # unchanged in dry run + + def test_updates_relationships(self): + entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}} + rels = { + "albert::mondo": {"a": "albert", "b": "mondo", "types": ["co-occurrence"], "count": 1, "first_seen": "2026-01-01", "last_seen": "2026-01-01"}, + } + e, r = kc.deduplicate(entities, rels, dry_run=False) + # Relationship should be remapped to canonical + assert len(r) == 1 + + +class TestScoreRelationships: + def test_scores_assigned(self): + rels = { + "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat(), "first_seen": "2026-01-01"}, + } + result = kc.score_relationships(rels, dry_run=False) + assert "strength" in result["a::b"] + assert 0 < result["a::b"]["strength"] <= 1 + + def test_removes_weak(self): + old_date = (datetime.now() - timedelta(days=300)).isoformat() + rels = { + "a::b": {"count": 1, "types": ["co-occurrence"], "last_seen": old_date, "first_seen": old_date}, + } + result = kc.score_relationships(rels, dry_run=False) + # Very old + low count should have low strength + if len(result) > 0: + assert result["a::b"]["strength"] < 0.3 + + def test_dry_run(self): + rels = { + "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat()}, + } + result = kc.score_relationships(rels, dry_run=True) + assert "strength" not in result["a::b"] + + +class TestClassifyUnknowns: + @patch("knowledge_cleanup.ollama_generate") + def test_no_unknowns(self, mock_ollama): + entities = {"albert": {"type": "person"}} + result = kc.classify_unknowns(entities, dry_run=False) + mock_ollama.assert_not_called() + assert result == entities + + @patch("knowledge_cleanup.ollama_generate") + def test_classifies_unknowns(self, mock_ollama): + mock_ollama.return_value = '{"1": "person"}' + entities = {"albert": {"type": "unknown"}} + result = kc.classify_unknowns(entities, dry_run=False) + assert result["albert"]["type"] == "person" + + @patch("knowledge_cleanup.ollama_generate") + def test_dry_run_no_change(self, mock_ollama): + mock_ollama.return_value = '{"1": "person"}' + entities = {"albert": {"type": "unknown"}} + result = kc.classify_unknowns(entities, dry_run=True) + assert result["albert"]["type"] == "unknown" + + @patch("knowledge_cleanup.ollama_generate") + def test_handles_llm_failure(self, mock_ollama): + mock_ollama.side_effect = Exception("timeout") + entities = {"albert": {"type": "unknown"}} + result = kc.classify_unknowns(entities, dry_run=False) + assert result["albert"]["type"] == "unknown" # unchanged diff --git a/tests/test_knowledge_extractor.py b/tests/test_knowledge_extractor.py new file mode 100644 index 0000000..40a13c4 --- /dev/null +++ b/tests/test_knowledge_extractor.py @@ -0,0 +1,61 @@ +"""Tests for knowledge_extractor.py (darkplex-core root) — Smart Extractor.""" + +import sys +from pathlib import Path +from unittest.mock import patch + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core")) + +from knowledge_extractor import importance_heuristic, parse_since + + +class TestImportanceHeuristic: + def test_empty(self): + assert importance_heuristic("") == 0.0 + assert importance_heuristic(None) == 0.0 + + def test_short_text(self): + score = importance_heuristic("Hello world") + assert 0 < score <= 1.0 + + def test_long_text_boosted(self): + short = importance_heuristic("Hello") + long = importance_heuristic("x " * 300) + assert long > short + + def test_heartbeat_penalized(self): + score = importance_heuristic("HEARTBEAT_OK system running fine no issues detected at all") + assert score < 0.3 + + def test_business_boosted(self): + score = importance_heuristic("Meeting about the project deadline and contract with the client partnership") + assert score > 0.4 + + def test_capitalized_names_boost(self): + text = "Albert discussed with Thomas, Sarah, Michael, Peter, Franz, and Maria about the Company" + score = importance_heuristic(text) + assert score > 0.4 + + def test_clamped(self): + # Even extreme texts should be 0-1 + score = importance_heuristic("cron: heartbeat HEARTBEAT_OK health check no critical") + assert 0 <= score <= 1.0 + + +class TestParseSince: + def test_hours(self): + ts = parse_since("6h") + assert ts is not None + assert ts > 0 + + def test_days(self): + ts = parse_since("1d") + assert ts is not None + + def test_minutes(self): + ts = parse_since("30m") + assert ts is not None + + def test_invalid(self): + assert parse_since("abc") is None + assert parse_since("") is None diff --git a/tests/test_llm_extractor.py b/tests/test_llm_extractor.py new file mode 100644 index 0000000..34bdd14 --- /dev/null +++ b/tests/test_llm_extractor.py @@ -0,0 +1,147 @@ +"""Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor.""" + +import json +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence")) + +from llm_extractor import ( + _parse_json_response, + _normalize_entities, + extract_entities_llm, + extract_entities_llm_batch, + is_available, + VALID_TYPES, +) + + +class TestParseJsonResponse: + def test_empty(self): + assert _parse_json_response("") == {} + assert _parse_json_response(None) == {} + + def test_plain_json(self): + r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}') + assert "albert" in r + + def test_markdown_fenced(self): + r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```') + assert "albert" in r + + def test_no_json(self): + assert _parse_json_response("no json here") == {} + + def test_nested_braces(self): + r = _parse_json_response('{"a": {"type": "person", "context": "test"}}') + assert "a" in r + + +class TestNormalizeEntities: + def test_valid_entity(self): + raw = {"Albert": {"type": "person", "context": "CEO of company"}} + result = _normalize_entities(raw) + assert "albert" in result + assert result["albert"]["type"] == "person" + assert result["albert"]["match"] == "llm" + + def test_type_alias(self): + raw = {"python": {"type": "language", "context": "programming"}} + result = _normalize_entities(raw) + assert result["python"]["type"] == "technology" + + def test_unknown_type_becomes_concept(self): + raw = {"thing": {"type": "xyzzy", "context": "unknown"}} + result = _normalize_entities(raw) + assert result["thing"]["type"] == "concept" + + def test_filters_short_names(self): + raw = {"x": {"type": "person", "context": "test"}} + result = _normalize_entities(raw) + assert len(result) == 0 + + def test_filters_long_names(self): + raw = {"a" * 81: {"type": "person", "context": "test"}} + result = _normalize_entities(raw) + assert len(result) == 0 + + def test_non_dict_info_skipped(self): + raw = {"test": "not a dict"} + result = _normalize_entities(raw) + assert len(result) == 0 + + def test_context_truncated(self): + raw = {"test": {"type": "person", "context": "x" * 200}} + result = _normalize_entities(raw) + assert len(result["test"]["context"]) <= 100 + + def test_underscores_to_hyphens(self): + raw = {"mondo_gate": {"type": "company", "context": "test"}} + result = _normalize_entities(raw) + assert "mondo-gate" in result + + +class TestExtractEntitiesLlm: + @patch("llm_extractor._call_ollama") + def test_empty_text(self, mock_ollama): + assert extract_entities_llm("") == {} + assert extract_entities_llm("short") == {} + mock_ollama.assert_not_called() + + @patch("llm_extractor._call_ollama") + def test_ollama_unavailable(self, mock_ollama): + mock_ollama.return_value = None + result = extract_entities_llm("This is a test about Albert and Mondo Gate AG") + assert result is None # signals fallback + + @patch("llm_extractor._call_ollama") + def test_successful_extraction(self, mock_ollama): + mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}' + result = extract_entities_llm("Albert discussed the project with the team members today") + assert "albert" in result + assert result["albert"]["type"] == "person" + + @patch("llm_extractor._call_ollama") + def test_truncates_long_text(self, mock_ollama): + mock_ollama.return_value = "{}" + extract_entities_llm("x" * 3000) + call_args = mock_ollama.call_args[0][0] + # The text in the prompt should be truncated + assert len(call_args) < 3000 + 500 # prompt overhead + + +class TestExtractEntitiesLlmBatch: + @patch("llm_extractor._call_ollama") + def test_empty_list(self, mock_ollama): + assert extract_entities_llm_batch([]) == {} + mock_ollama.assert_not_called() + + @patch("llm_extractor._call_ollama") + def test_filters_short_texts(self, mock_ollama): + mock_ollama.return_value = "{}" + result = extract_entities_llm_batch(["hi", "yo", ""]) + assert result == {} + mock_ollama.assert_not_called() + + @patch("llm_extractor._call_ollama") + def test_batch_extraction(self, mock_ollama): + mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}' + result = extract_entities_llm_batch(["Python is a great programming language for data science"]) + assert "python" in result + + +class TestIsAvailable: + @patch("llm_extractor.urllib.request.urlopen") + def test_available(self, mock_urlopen): + mock_resp = MagicMock() + mock_resp.status = 200 + mock_resp.__enter__ = MagicMock(return_value=mock_resp) + mock_resp.__exit__ = MagicMock(return_value=False) + mock_urlopen.return_value = mock_resp + assert is_available() is True + + @patch("llm_extractor.urllib.request.urlopen") + def test_unavailable(self, mock_urlopen): + mock_urlopen.side_effect = Exception("connection refused") + assert is_available() is False diff --git a/tests/test_loop.py b/tests/test_loop.py new file mode 100644 index 0000000..209ed7a --- /dev/null +++ b/tests/test_loop.py @@ -0,0 +1,119 @@ +"""Tests for intelligence/loop.py — Darkplex Loop state machine and helpers.""" + +import json +import sys +import time +from datetime import datetime, timezone, timedelta +from pathlib import Path +from unittest.mock import patch, MagicMock + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence")) + +import loop as darkplex_loop + + +class TestImportance: + def test_empty(self): + assert darkplex_loop._importance("") == 0.0 + + def test_heartbeat_low(self): + assert darkplex_loop._importance("HEARTBEAT_OK all systems nominal") < 0.2 + + def test_business_content_high(self): + score = darkplex_loop._importance("Meeting about the project deadline and budget milestone") + assert score > 0.4 + + def test_clamped(self): + for text in ["", "x" * 1000, "meeting project company contract decision strategy"]: + s = darkplex_loop._importance(text) + assert 0.0 <= s <= 1.0 + + +class TestLoopState: + def test_init(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + assert state.status == "INIT" + assert state.cycle_count == 0 + + def test_save_and_load(self, tmp_path): + sf = tmp_path / "state.json" + with patch.object(darkplex_loop, 'STATE_FILE', sf): + state = darkplex_loop.LoopState() + state.status = "RUNNING" + state.cycle_count = 5 + state.save() + + state2 = darkplex_loop.LoopState() + assert state2.status == "RUNNING" + assert state2.cycle_count == 5 + + def test_record_success(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + state.record_success({"test": "ok"}) + assert state.status == "RUNNING" + assert state.consecutive_failures == 0 + assert state.cycle_count == 1 + + def test_record_failure_degraded(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + state.record_failure("ingest", "timeout") + assert state.status == "DEGRADED" + assert state.consecutive_failures == 1 + + def test_record_failure_emergency(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + for i in range(3): + state.record_failure("ingest", "timeout") + assert state.status == "EMERGENCY" + + def test_can_alert(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + assert state.can_alert() + state.mark_alerted() + assert not state.can_alert() + + def test_record_perf(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + state.record_perf({"total_ms": 1000, "ingest_ms": 200}) + assert state.perf["total_ms"] == 1000 + assert len(state.perf_history) == 1 + + def test_perf_averages(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + state.record_perf({"total_ms": 1000}) + state.record_perf({"total_ms": 2000}) + avgs = state.perf_averages() + assert avgs["total_ms"] == 1500 + + def test_perf_history_capped(self, tmp_path): + with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"): + state = darkplex_loop.LoopState() + for i in range(15): + state.record_perf({"total_ms": i * 100}) + assert len(state.perf_history) == 10 + + +class TestCheckNewEvents: + @patch("loop.subprocess.run") + def test_returns_pending(self, mock_run): + mock_run.return_value = MagicMock( + returncode=0, stdout=json.dumps({"num_pending": 42}) + ) + assert darkplex_loop.check_new_events() == 42 + + @patch("loop.subprocess.run") + def test_returns_negative_on_failure(self, mock_run): + mock_run.return_value = MagicMock(returncode=1, stdout="") + assert darkplex_loop.check_new_events() == -1 + + @patch("loop.subprocess.run") + def test_handles_exception(self, mock_run): + mock_run.side_effect = Exception("nats not found") + assert darkplex_loop.check_new_events() == -1 diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py new file mode 100644 index 0000000..6a489b8 --- /dev/null +++ b/tests/test_shared_memory.py @@ -0,0 +1,72 @@ +"""Tests for intelligence/shared_memory module.""" + +import json +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory + + +class TestInsight: + def test_creation(self): + i = Insight(agent="claudia", topic="test", content="hello") + assert i.agent == "claudia" + assert i.timestamp # auto-set + + def test_to_json(self): + i = Insight(agent="claudia", topic="test", content="hello") + data = json.loads(i.to_json()) + assert data["agent"] == "claudia" + assert data["topic"] == "test" + + def test_from_json(self): + i = Insight(agent="claudia", topic="test", content="hello", confidence=0.9) + i2 = Insight.from_json(i.to_json()) + assert i2.agent == i.agent + assert i2.confidence == 0.9 + + def test_default_confidence(self): + i = Insight(agent="claudia", topic="t", content="c") + assert i.confidence == 0.8 + + def test_tags(self): + i = Insight(agent="claudia", topic="t", content="c", tags=["a", "b"]) + assert len(i.tags) == 2 + + +class TestSharedMemory: + def test_allowed_agent(self): + agent = list(ALLOWED_AGENTS)[0] + sm = SharedMemory(agent_name=agent) + assert sm.agent_name == agent + + def test_disallowed_agent(self): + with pytest.raises(ValueError, match="not allowed"): + SharedMemory(agent_name="hacker_bot") + + def test_not_connected_publish(self): + agent = list(ALLOWED_AGENTS)[0] + sm = SharedMemory(agent_name=agent) + with pytest.raises(RuntimeError, match="Not connected"): + import asyncio + asyncio.get_event_loop().run_until_complete( + sm.publish(Insight(agent=agent, topic="t", content="c")) + ) + + def test_not_connected_subscribe(self): + agent = list(ALLOWED_AGENTS)[0] + sm = SharedMemory(agent_name=agent) + with pytest.raises(RuntimeError, match="Not connected"): + import asyncio + asyncio.get_event_loop().run_until_complete( + sm.subscribe("t", lambda x: None) + ) + + +class TestAllowedAgents: + def test_default_agents(self): + assert "claudia" in ALLOWED_AGENTS + assert len(ALLOWED_AGENTS) >= 1 diff --git a/tests/test_temporal.py b/tests/test_temporal.py new file mode 100644 index 0000000..8cd6d79 --- /dev/null +++ b/tests/test_temporal.py @@ -0,0 +1,77 @@ +"""Tests for intelligence/temporal.py — Temporal Context API.""" + +import sys +from datetime import datetime, timezone +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence")) + +from temporal import TemporalEntry, TemporalQuery, TemporalContext + + +class TestTemporalEntry: + def test_creation(self): + e = TemporalEntry( + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + source="nats", + topic="ssl-cert", + content="SSL cert expiring", + ) + assert e.source == "nats" + assert e.relevance_score == 0.0 + + def test_metadata(self): + e = TemporalEntry( + timestamp=datetime.now(timezone.utc), + source="chromadb", + topic="test", + content="test", + metadata={"key": "value"}, + relevance_score=0.95, + ) + assert e.metadata["key"] == "value" + assert e.relevance_score == 0.95 + + +class TestTemporalQuery: + def test_defaults(self): + q = TemporalQuery(topic="test") + assert q.limit == 50 + assert "nats" in q.sources + assert "chromadb" in q.sources + + def test_custom(self): + q = TemporalQuery( + topic="ssl", + start_time=datetime(2026, 1, 1, tzinfo=timezone.utc), + limit=10, + sources=["nats"], + ) + assert q.limit == 10 + assert len(q.sources) == 1 + + +class TestTemporalContext: + def test_init_defaults(self): + ctx = TemporalContext() + assert "localhost" in ctx.nats_url + assert "localhost" in ctx.chromadb_url + + def test_init_custom(self): + ctx = TemporalContext(nats_url="nats://custom:4222", chromadb_url="http://custom:8000") + assert ctx.nats_url == "nats://custom:4222" + + @pytest.mark.asyncio + async def test_query_no_connections(self): + ctx = TemporalContext() + # No connections established, should return empty + result = await ctx.query(TemporalQuery(topic="test")) + assert result == [] + + @pytest.mark.asyncio + async def test_close_no_connection(self): + ctx = TemporalContext() + await ctx.close() # Should not raise