Merge darkplex-core into cortex — unified intelligence layer v0.2.0
Some checks failed
Tests / test (push) Failing after 2s

- Merged all unique darkplex-core modules into cortex:
  - intelligence/ subfolder (anticipator, collective, shared_memory, knowledge_cleanup, temporal, llm_extractor, loop)
  - governance/ subfolder (policy engine, risk scorer, evidence, enforcer, report generator)
  - entity_manager.py, knowledge_extractor.py
- Fixed bare 'from intelligence.' imports to 'from cortex.intelligence.'
- Added 'darkplex' CLI alias alongside 'cortex'
- Package renamed to darkplex-core v0.2.0
- 405 tests passing (was 234)
- 14 new test files covering all merged modules
This commit is contained in:
Claudia 2026-02-12 08:43:02 +01:00
parent fda607c204
commit fd7d75c0ed
41 changed files with 6368 additions and 3 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ dist/
build/ build/
.eggs/ .eggs/
.pytest_cache/ .pytest_cache/
.coverage

371
cortex/entity_manager.py Executable file
View file

@ -0,0 +1,371 @@
#!/usr/bin/env python3
"""
Entity Manager File-based knowledge graph for entity extraction and relationship mapping.
Part of Level 4.4 AGI Roadmap.
Usage:
entity-manager.py bootstrap Bootstrap from life/areas/
entity-manager.py extract "text" Extract entities from text
entity-manager.py relate "A" "B" [type] Create/update relationship
entity-manager.py query "entity" Query relationships for entity
entity-manager.py graph Output relationship summary
"""
import sys
import os
import json
import re
import time
from pathlib import Path
KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
LIFE_AREAS = Path.home() / "life" / "areas"
# Common words to skip during entity extraction
STOP_WORDS = {
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "need", "must", "i", "you",
"he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
"my", "your", "his", "its", "our", "their", "this", "that", "these",
"those", "what", "which", "who", "whom", "where", "when", "why", "how",
"all", "each", "every", "both", "few", "more", "most", "other", "some",
"such", "no", "nor", "not", "only", "own", "same", "so", "than", "too",
"very", "just", "because", "as", "until", "while", "of", "at", "by",
"for", "with", "about", "against", "between", "through", "during",
"before", "after", "above", "below", "to", "from", "up", "down", "in",
"out", "on", "off", "over", "under", "again", "further", "then", "once",
"here", "there", "and", "but", "or", "if", "then", "else", "also",
"system", "cron", "heartbeat", "ok", "error", "warning", "info",
"message", "session", "agent", "main", "matrix", "telegram",
"read", "write", "check", "run", "send", "get", "set", "let", "see",
"know", "think", "want", "like", "make", "take", "come", "go", "say",
"tell", "ask", "try", "use", "find", "give", "new", "good", "first",
"last", "long", "great", "little", "right", "big", "high", "old",
"different", "small", "large", "next", "early", "young", "important",
"public", "bad", "sure", "sure", "yes", "no", "maybe", "ok", "okay",
"thanks", "thank", "please", "hello", "hi", "hey", "bye", "well",
"now", "today", "tomorrow", "yesterday", "monday", "tuesday",
"wednesday", "thursday", "friday", "saturday", "sunday",
"january", "february", "march", "april", "may", "june", "july",
"august", "september", "october", "november", "december",
"still", "already", "currently", "actually", "really", "right",
"look", "keep", "going", "based", "done", "work", "working",
}
def normalize(name):
"""Normalize entity name."""
return name.strip().lower().replace("_", "-")
def load_json(path):
"""Load JSON file, return empty dict if missing/invalid."""
try:
with open(path) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {}
def save_json(path, data):
"""Save JSON file, creating directories as needed."""
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
def load_known_entities():
"""Load known entity names from life/areas/ and entities.json."""
known = {}
# From life/areas
for category in ["people", "companies", "projects"]:
area_dir = LIFE_AREAS / category
if not area_dir.exists():
continue
etype = category.rstrip("s") # person, company, project
if category == "people":
etype = "person"
for entry in area_dir.iterdir():
if entry.is_dir():
name = normalize(entry.name)
known[name] = {"type": etype, "source": f"life/areas/{category}"}
# From entities.json
entities = load_json(ENTITIES_FILE)
for name, info in entities.items():
if name not in known:
known[name] = info
return known
def extract_entities(text, known=None):
"""Extract entities from text using heuristics and known entity matching."""
if known is None:
known = load_known_entities()
found = {}
text_lower = text.lower()
# 1. Match known entities
for name, info in known.items():
# Check for name or slug in text
variants = [name, name.replace("-", " "), name.replace("-", "")]
for v in variants:
if v in text_lower and len(v) > 2:
found[name] = {"type": info.get("type", "unknown"), "match": "known"}
break
# 2. Extract @mentions
for m in re.finditer(r"@(\w+)", text):
name = normalize(m.group(1))
if name not in found and name not in STOP_WORDS and len(name) > 2:
found[name] = {"type": "person", "match": "mention"}
# 3. Extract capitalized multi-word names (likely proper nouns)
for m in re.finditer(r"\b([A-Z][a-zäöüß]+(?:\s+[A-Z][a-zäöüß]+)+)\b", text):
name = normalize(m.group(1))
if name not in found and name not in STOP_WORDS and len(name) > 3:
# Heuristic: if 2-3 words, likely person; if more, likely org/topic
words = name.split()
etype = "person" if len(words) <= 3 else "topic"
found[name] = {"type": etype, "match": "capitalized"}
# 4. Extract standalone capitalized words (potential entities)
for m in re.finditer(r"\b([A-Z][a-zäöüß]{2,})\b", text):
name = normalize(m.group(1))
if name not in found and name not in STOP_WORDS:
found[name] = {"type": "unknown", "match": "capitalized_single"}
# 5. Extract ALL-CAPS acronyms (likely companies/products)
for m in re.finditer(r"\b([A-Z]{2,6})\b", text):
name = normalize(m.group(1))
if name not in found and name not in STOP_WORDS and name not in {
"ok", "am", "pm", "gmt", "utc", "url", "api", "cli", "ssh", "dns",
"http", "https", "json", "html", "css", "js", "ts", "py", "md",
"id", "ui", "ux", "io", "os", "ip", "gb", "mb", "kb", "tb",
}:
found[name] = {"type": "organization", "match": "acronym"}
return found
def cmd_bootstrap():
"""Bootstrap entities from life/areas/."""
entities = load_json(ENTITIES_FILE)
relationships = load_json(RELATIONSHIPS_FILE)
count = 0
for category in ["people", "companies"]:
area_dir = LIFE_AREAS / category
if not area_dir.exists():
continue
etype = "person" if category == "people" else "company"
for entry in sorted(area_dir.iterdir()):
if not entry.is_dir():
continue
name = normalize(entry.name)
if name in entities:
continue
info = {"type": etype, "source": f"life/areas/{category}", "bootstrapped": True}
# Try to extract extra info from summary.md
summary_path = entry / "summary.md"
if summary_path.exists():
try:
summary = summary_path.read_text(errors="replace")[:2000]
# Extract email
em = re.search(r"\*\*Email:\*\*\s*(\S+)", summary)
if em:
info["email"] = em.group(1)
# Extract context
ctx = re.search(r"\*\*Kontext:\*\*\s*(.+)", summary)
if ctx:
info["context"] = ctx.group(1).strip()
except Exception:
pass
entities[name] = info
count += 1
save_json(ENTITIES_FILE, entities)
save_json(RELATIONSHIPS_FILE, relationships)
print(f"Bootstrapped {count} new entities. Total: {len(entities)}")
def cmd_extract(text):
"""Extract and display entities from text."""
known = load_known_entities()
found = extract_entities(text, known)
if not found:
print("No entities found.")
return
# Update entities.json with new discoveries
entities = load_json(ENTITIES_FILE)
new_count = 0
for name, info in found.items():
if name not in entities:
entities[name] = {
"type": info["type"],
"source": "extraction",
"first_seen": time.strftime("%Y-%m-%dT%H:%M:%S"),
}
new_count += 1
print(f" [{info['type']:12s}] {name} ({info['match']})")
if new_count:
save_json(ENTITIES_FILE, entities)
print(f"\n{new_count} new entities added to registry.")
def cmd_relate(entity_a, entity_b, rel_type="related"):
"""Create or update a relationship between two entities."""
a, b = normalize(entity_a), normalize(entity_b)
relationships = load_json(RELATIONSHIPS_FILE)
entities = load_json(ENTITIES_FILE)
key = f"{min(a,b)}::{max(a,b)}"
ts = time.strftime("%Y-%m-%dT%H:%M:%S")
if key in relationships:
rel = relationships[key]
rel["count"] = rel.get("count", 1) + 1
rel["last_seen"] = ts
if rel_type != "related" and rel_type not in rel.get("types", []):
rel.setdefault("types", []).append(rel_type)
print(f"Updated: {a} <-> {b} (seen {rel['count']}x)")
else:
relationships[key] = {
"a": a, "b": b,
"types": [rel_type],
"count": 1,
"first_seen": ts,
"last_seen": ts,
}
print(f"Created: {a} <-> {b} ({rel_type})")
# Ensure both entities exist
for name in [a, b]:
if name not in entities:
entities[name] = {"type": "unknown", "source": "relationship", "first_seen": ts}
save_json(RELATIONSHIPS_FILE, relationships)
save_json(ENTITIES_FILE, entities)
def cmd_query(entity_name):
"""Query all relationships for an entity."""
name = normalize(entity_name)
relationships = load_json(RELATIONSHIPS_FILE)
entities = load_json(ENTITIES_FILE)
# Entity info
if name in entities:
info = entities[name]
print(f"Entity: {name}")
print(f" Type: {info.get('type', 'unknown')}")
if info.get("email"):
print(f" Email: {info['email']}")
if info.get("context"):
print(f" Context: {info['context']}")
if info.get("source"):
print(f" Source: {info['source']}")
else:
print(f"Entity '{name}' not found in registry.")
# Relationships
rels = []
for key, rel in relationships.items():
if rel["a"] == name or rel["b"] == name:
other = rel["b"] if rel["a"] == name else rel["a"]
rels.append((other, rel))
if rels:
print(f"\nRelationships ({len(rels)}):")
for other, rel in sorted(rels, key=lambda x: -x[1].get("count", 1)):
types = ", ".join(rel.get("types", ["related"]))
print(f" {name} <-> {other} [{types}] (seen {rel.get('count', 1)}x)")
else:
print("\nNo relationships found.")
# Check life/areas/
for category in ["people", "companies", "projects"]:
area_path = LIFE_AREAS / category / name.replace(" ", "-")
if area_path.exists():
summary_path = area_path / "summary.md"
if summary_path.exists():
print(f"\nLife area ({category}): {area_path}")
content = summary_path.read_text(errors="replace")[:500]
print(content)
def cmd_graph():
"""Output a simple relationship graph summary."""
relationships = load_json(RELATIONSHIPS_FILE)
entities = load_json(ENTITIES_FILE)
if not relationships:
print("No relationships in knowledge graph.")
return
# Count connections per entity
connections = {}
for key, rel in relationships.items():
for name in [rel["a"], rel["b"]]:
connections[name] = connections.get(name, 0) + 1
# Sort by connections
top = sorted(connections.items(), key=lambda x: -x[1])
print(f"Knowledge Graph: {len(entities)} entities, {len(relationships)} relationships\n")
print("Top connected entities:")
for name, count in top[:20]:
etype = entities.get(name, {}).get("type", "?")
print(f" {name} ({etype}): {count} connections")
print(f"\nRecent relationships:")
recent = sorted(relationships.values(), key=lambda r: r.get("last_seen", ""), reverse=True)[:10]
for rel in recent:
types = ", ".join(rel.get("types", ["related"]))
print(f" {rel['a']} <-> {rel['b']} [{types}]")
def main():
if len(sys.argv) < 2:
print(__doc__)
sys.exit(1)
cmd = sys.argv[1]
if cmd == "bootstrap":
cmd_bootstrap()
elif cmd == "extract":
if len(sys.argv) < 3:
print("Usage: entity-manager.py extract \"text\"")
sys.exit(1)
cmd_extract(" ".join(sys.argv[2:]))
elif cmd == "relate":
if len(sys.argv) < 4:
print("Usage: entity-manager.py relate \"entity_a\" \"entity_b\" [type]")
sys.exit(1)
rel_type = sys.argv[4] if len(sys.argv) > 4 else "related"
cmd_relate(sys.argv[2], sys.argv[3], rel_type)
elif cmd == "query":
if len(sys.argv) < 3:
print("Usage: entity-manager.py query \"entity\"")
sys.exit(1)
cmd_query(" ".join(sys.argv[2:]))
elif cmd == "graph":
cmd_graph()
else:
print(f"Unknown command: {cmd}")
print(__doc__)
sys.exit(1)
if __name__ == "__main__":
main()

View file

228
cortex/governance/cli.py Normal file
View file

@ -0,0 +1,228 @@
"""Governance CLI — policy evaluation, risk scoring, evidence & reporting.
Usage:
darkplex governance evaluate --agent <name> --action <action> [--data-type <type>] [--target <target>] [--role <role>]
darkplex governance risk --agent <name> --action <action> [--data-type <type>] [--target <target>] [--role <role>]
darkplex governance evidence [--agent <name>] [--verdict <verdict>] [--control <id>] [--json]
darkplex governance report [--agent <name>] [--json] [--output <path>]
darkplex governance policies [--reload]
darkplex governance status
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
# Default paths
DEFAULT_POLICIES_DIR = os.environ.get(
"GOVERNANCE_POLICIES_DIR",
str(Path(__file__).parent / "policies"),
)
DEFAULT_CONTROLS_MAPPING = os.environ.get(
"GOVERNANCE_CONTROLS_MAPPING",
str(Path(__file__).parent / "controls" / "iso27001-mapping.yaml"),
)
def _build_context(args: argparse.Namespace) -> dict:
"""Build an evaluation context from CLI args."""
ctx = {}
if args.agent:
ctx["agent"] = args.agent
if args.action:
ctx["action"] = args.action
if args.data_type:
ctx["data_type"] = args.data_type
if args.target:
ctx["target"] = args.target
if args.role:
ctx["agent_role"] = args.role
return ctx
def _get_engine():
from governance.policy import PolicyEngine
return PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR)
def _get_scorer():
from governance.risk_scorer import RiskScorer
return RiskScorer()
def _get_enforcer():
from governance.enforcer import Enforcer
from governance.policy import PolicyEngine
from governance.risk_scorer import RiskScorer
from governance.evidence import EvidenceCollector, ControlMapping
return Enforcer(
policy_engine=PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR),
risk_scorer=RiskScorer(),
evidence_collector=EvidenceCollector(
control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING)
),
)
def cmd_evaluate(args: argparse.Namespace) -> None:
"""Full governance evaluation: policy + risk + evidence."""
enforcer = _get_enforcer()
ctx = _build_context(args)
decision = enforcer.evaluate(ctx)
if args.json:
print(json.dumps({
"verdict": decision.verdict,
"reason": decision.reason,
"risk_score": decision.risk.value,
"risk_level": decision.risk.level,
"risk_factors": decision.risk.factors,
"policy_result": decision.policy_result,
}, indent=2))
else:
icon = {"approve": "", "deny": "", "escalate": "⚠️"}.get(decision.verdict, "")
print(f"{icon} Verdict: {decision.verdict.upper()}")
print(f" Reason: {decision.reason}")
print(f" Risk: {decision.risk.value}/10 ({decision.risk.level})")
for factor, detail in decision.risk.factors.items():
print(f"{factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})")
def cmd_risk(args: argparse.Namespace) -> None:
"""Risk scoring only."""
scorer = _get_scorer()
ctx = _build_context(args)
result = scorer.score(ctx)
if args.json:
print(json.dumps({
"risk_score": result.value,
"risk_level": result.level,
"acceptable": result.is_acceptable,
"factors": result.factors,
}, indent=2))
else:
icon = "🟢" if result.is_acceptable else "🔴"
print(f"{icon} Risk Score: {result.value}/10 ({result.level})")
print(f" Acceptable: {'yes' if result.is_acceptable else 'NO'}")
for factor, detail in result.factors.items():
print(f"{factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})")
def cmd_policies(args: argparse.Namespace) -> None:
"""List loaded policies."""
engine = _get_engine()
if not engine.policies:
print("No policies loaded.")
return
for policy in engine.policies:
print(f"📋 {policy.name} (v{policy.version})")
print(f" {policy.description}")
print(f" Rules: {len(policy.rules)}")
for rule in policy.rules:
print(f"{rule.name}{rule.effect} (priority: {rule.priority})")
print()
def cmd_status(args: argparse.Namespace) -> None:
"""Show governance system status."""
engine = _get_engine()
scorer = _get_scorer()
policies_count = len(engine.policies)
rules_count = sum(len(p.rules) for p in engine.policies)
policies_dir = DEFAULT_POLICIES_DIR
controls_file = DEFAULT_CONTROLS_MAPPING
print("🛡️ Darkplex Governance Status")
print(f" Policies dir: {policies_dir}")
print(f" Controls map: {controls_file}")
print(f" Policies loaded: {policies_count}")
print(f" Total rules: {rules_count}")
print(f" Policies dir exists: {'' if Path(policies_dir).exists() else ''}")
print(f" Controls file exists: {'' if Path(controls_file).exists() else ''}")
def cmd_report(args: argparse.Namespace) -> None:
"""Generate compliance report (placeholder — needs live evidence)."""
from governance.evidence import EvidenceCollector, ControlMapping
from governance.report_generator import ReportGenerator
collector = EvidenceCollector(
control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING)
)
generator = ReportGenerator(collector)
if args.agent:
report = generator.generate_agent_report(args.agent)
else:
report = generator.generate_compliance_report()
output = json.dumps(report, indent=2)
if args.output:
Path(args.output).write_text(output)
print(f"✅ Report written to {args.output}")
else:
print(output)
def main() -> None:
parser = argparse.ArgumentParser(prog="darkplex governance", description="Governance Engine")
parser.add_argument("--json", action="store_true", help="JSON output")
sub = parser.add_subparsers(dest="subcmd")
# evaluate
p_eval = sub.add_parser("evaluate", aliases=["eval"], help="Full policy + risk evaluation")
p_eval.add_argument("--agent", required=True)
p_eval.add_argument("--action", required=True)
p_eval.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"])
p_eval.add_argument("--target", default="internal", choices=["internal", "external"])
p_eval.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"])
p_eval.add_argument("--json", action="store_true", dest="json")
# risk
p_risk = sub.add_parser("risk", help="Risk scoring only")
p_risk.add_argument("--agent", default="unknown")
p_risk.add_argument("--action", default="unknown")
p_risk.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"])
p_risk.add_argument("--target", default="internal", choices=["internal", "external"])
p_risk.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"])
p_risk.add_argument("--json", action="store_true", dest="json")
# policies
p_pol = sub.add_parser("policies", help="List loaded policies")
p_pol.add_argument("--reload", action="store_true")
# status
sub.add_parser("status", help="Show governance status")
# report
p_rep = sub.add_parser("report", help="Generate compliance report")
p_rep.add_argument("--agent", default=None)
p_rep.add_argument("--output", "-o", default=None)
p_rep.add_argument("--json", action="store_true", dest="json")
args = parser.parse_args()
if args.subcmd in ("evaluate", "eval"):
cmd_evaluate(args)
elif args.subcmd == "risk":
cmd_risk(args)
elif args.subcmd == "policies":
cmd_policies(args)
elif args.subcmd == "status":
cmd_status(args)
elif args.subcmd == "report":
cmd_report(args)
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,70 @@
# ISO 27001 Annex A Controls → NATS Event Streams Mapping
#
# Maps governance event types to ISO 27001:2022 Annex A controls.
# Used by the Evidence Collector to tag evidence with applicable controls.
version: "1.0.0"
mappings:
# A.5 — Organizational Controls (Information Security Policies)
- controls:
- "A.5.1" # Policies for information security
- "A.5.2" # Information security roles and responsibilities
- "A.5.4" # Management responsibilities
event_types:
- policy_evaluation
- policy_update
- policy_violation
nats_subjects:
- "governance.policy.>"
# A.5.10-12 — Acceptable use, return, classification
- controls:
- "A.5.10" # Acceptable use of information
- "A.5.12" # Classification of information
- "A.5.13" # Labelling of information
event_types:
- data_access
- data_classification
- data_export
nats_subjects:
- "governance.data.>"
# A.8 — Technology Controls (Asset Management)
- controls:
- "A.8.1" # User endpoint devices
- "A.8.2" # Privileged access rights
- "A.8.5" # Secure authentication
- "A.8.9" # Configuration management
- "A.8.16" # Monitoring activities
event_types:
- agent_authentication
- agent_action
- system_configuration
- monitoring_alert
nats_subjects:
- "governance.agent.>"
- "governance.system.>"
# A.9 — Access Control
- controls:
- "A.5.15" # Access control
- "A.5.16" # Identity management
- "A.5.17" # Authentication information
- "A.5.18" # Access rights
event_types:
- access_request
- access_granted
- access_denied
- role_change
nats_subjects:
- "governance.access.>"
# A.5.23-25 — Supplier/Cloud
- controls:
- "A.5.23" # Information security for cloud services
event_types:
- external_api_call
- cloud_service_access
nats_subjects:
- "governance.external.>"

View file

@ -0,0 +1,129 @@
"""Runtime Enforcer: pre-execution policy check (approve/deny/escalate).
The enforcer is the single entry point for all agent action governance.
It orchestrates the policy engine, risk scorer, and evidence collector.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Any
from governance.evidence import EvidenceCollector
from governance.policy import PolicyEngine
from governance.risk_scorer import RiskResult, RiskScorer
logger = logging.getLogger(__name__)
@dataclass
class Decision:
"""The final governance decision for an agent action."""
verdict: str # "approve", "deny", "escalate"
reason: str
risk: RiskResult
policy_result: dict[str, Any]
@property
def approved(self) -> bool:
return self.verdict == "approve"
class Enforcer:
"""Pre-execution governance enforcer.
Evaluates every agent action against policies and risk scoring,
records evidence, and returns a decision.
Usage:
enforcer = Enforcer(policy_engine, risk_scorer, evidence_collector)
decision = enforcer.evaluate({"agent": "claudia", "action": "send_email", ...})
if decision.approved:
execute_action()
"""
# Risk levels that override policy to deny/escalate
RISK_OVERRIDES: dict[str, str] = {
"critical": "deny",
"high": "escalate",
}
def __init__(
self,
policy_engine: PolicyEngine | None = None,
risk_scorer: RiskScorer | None = None,
evidence_collector: EvidenceCollector | None = None,
) -> None:
self.policy_engine = policy_engine or PolicyEngine()
self.risk_scorer = risk_scorer or RiskScorer()
self.evidence_collector = evidence_collector or EvidenceCollector()
def evaluate(self, context: dict[str, Any]) -> Decision:
"""Evaluate an agent action and return a governance decision.
Args:
context: Action context dict with keys like:
- agent: agent identifier
- action: action name
- data_type / data_classification: data sensitivity
- target: internal/external
- agent_role: role of the requesting agent
- hour: time of day (optional)
Returns:
Decision with verdict, reason, risk score, and policy result.
"""
# Normalize data_type
if "data_classification" in context and "data_type" not in context:
context["data_type"] = context["data_classification"]
# Step 1: Risk scoring
risk = self.risk_scorer.score(context)
# Step 2: Policy evaluation
policy_result = self.policy_engine.evaluate(context)
policy_verdict = policy_result["verdict"]
# Step 3: Combine — risk can override policy to be MORE restrictive
verdict = policy_verdict
reason = policy_result["reason"]
risk_override = self.RISK_OVERRIDES.get(risk.level)
if risk_override:
strictness = {"deny": 0, "escalate": 1, "allow": 2}
if strictness.get(risk_override, 2) < strictness.get(verdict, 2):
verdict = risk_override
reason = f"Risk override ({risk.level}): {reason}"
# Step 4: Record evidence
agent = context.get("agent", "unknown")
action = context.get("action", "unknown")
self.evidence_collector.record(
event_type="policy_evaluation",
agent=agent,
action=action,
verdict=verdict,
risk_score=risk.value,
risk_level=risk.level,
details={
"context": context,
"policy_result": policy_result,
"risk_factors": risk.factors,
},
)
decision = Decision(
verdict=verdict,
reason=reason,
risk=risk,
policy_result=policy_result,
)
logger.info(
"Enforcer decision: %s%s (risk: %d/%s)",
f"{agent}/{action}", verdict, risk.value, risk.level,
)
return decision

View file

@ -0,0 +1,153 @@
"""Evidence Collector: NATS JetStream events → ISO 27001 control mapping.
Collects governance events from NATS, maps them to ISO 27001 Annex A controls,
and stores evidence for audit reporting.
"""
from __future__ import annotations
import json
import logging
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
import yaml
logger = logging.getLogger(__name__)
@dataclass
class EvidenceRecord:
"""A single piece of compliance evidence."""
timestamp: str
event_type: str
agent: str
action: str
verdict: str
risk_score: int
risk_level: str
controls: list[str] # ISO 27001 control IDs
details: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"timestamp": self.timestamp,
"event_type": self.event_type,
"agent": self.agent,
"action": self.action,
"verdict": self.verdict,
"risk_score": self.risk_score,
"risk_level": self.risk_level,
"controls": self.controls,
"details": self.details,
}
class ControlMapping:
"""Maps event types to ISO 27001 Annex A controls."""
def __init__(self, mapping_path: str | None = None) -> None:
self.mapping: dict[str, list[str]] = {}
path = mapping_path or os.environ.get(
"GOVERNANCE_CONTROLS_MAPPING", "controls/iso27001-mapping.yaml"
)
self._load_mapping(path)
def _load_mapping(self, path: str) -> None:
"""Load the control mapping from YAML."""
try:
with open(path, "r") as f:
data = yaml.safe_load(f)
for mapping in data.get("mappings", []):
for event_type in mapping.get("event_types", []):
self.mapping.setdefault(event_type, []).extend(mapping.get("controls", []))
logger.info("Loaded %d event type mappings", len(self.mapping))
except FileNotFoundError:
logger.warning("Control mapping not found: %s", path)
except Exception:
logger.exception("Failed to load control mapping: %s", path)
def get_controls(self, event_type: str) -> list[str]:
"""Return ISO 27001 controls applicable to an event type."""
return self.mapping.get(event_type, [])
class EvidenceCollector:
"""Collects and stores governance evidence from agent actions.
In production, this subscribes to NATS JetStream. For testing,
evidence can be recorded directly via record().
Usage:
collector = EvidenceCollector()
collector.record(event_type="policy_evaluation", agent="claudia", ...)
"""
def __init__(self, control_mapping: ControlMapping | None = None) -> None:
self.control_mapping = control_mapping or ControlMapping()
self.evidence: list[EvidenceRecord] = []
def record(
self,
event_type: str,
agent: str,
action: str,
verdict: str,
risk_score: int = 0,
risk_level: str = "low",
details: dict[str, Any] | None = None,
) -> EvidenceRecord:
"""Record a governance evidence entry.
Args:
event_type: Type of governance event (e.g., policy_evaluation, access_request)
agent: Agent identifier
action: Action being performed
verdict: Policy verdict (allow/deny/escalate)
risk_score: Numeric risk score (0-10)
risk_level: Risk level string
details: Additional context
"""
controls = self.control_mapping.get_controls(event_type)
record = EvidenceRecord(
timestamp=datetime.now(timezone.utc).isoformat(),
event_type=event_type,
agent=agent,
action=action,
verdict=verdict,
risk_score=risk_score,
risk_level=risk_level,
controls=controls,
details=details or {},
)
self.evidence.append(record)
logger.info(
"Evidence recorded: %s by %s%s (risk: %d/%s, controls: %s)",
action, agent, verdict, risk_score, risk_level, controls,
)
return record
def get_evidence(
self,
agent: str | None = None,
control: str | None = None,
verdict: str | None = None,
) -> list[EvidenceRecord]:
"""Query evidence with optional filters."""
results = self.evidence
if agent:
results = [e for e in results if e.agent == agent]
if control:
results = [e for e in results if control in e.controls]
if verdict:
results = [e for e in results if e.verdict == verdict]
return results
def export_json(self) -> str:
"""Export all evidence as JSON."""
return json.dumps([e.to_dict() for e in self.evidence], indent=2)

View file

@ -0,0 +1,46 @@
# Data Access Policy
# Defines who can access which data classifications.
name: data-access
description: Controls agent access to data based on classification and role
version: "1.0.0"
rules:
- name: deny-external-restricted
description: External agents cannot access restricted data
conditions:
agent_role: external
data_type: restricted
effect: deny
priority: 100
- name: escalate-confidential-external
description: Confidential data going external requires escalation
conditions:
data_type: confidential
target: external
effect: escalate
priority: 90
- name: deny-restricted-external
description: Restricted data must never leave internal systems
conditions:
data_type: restricted
target: external
effect: deny
priority: 100
- name: allow-public-any
description: Public data can be accessed by anyone
conditions:
data_type: public
effect: allow
priority: 10
- name: allow-internal-internal
description: Internal data accessible within internal systems
conditions:
data_type: internal
target: internal
effect: allow
priority: 50

View file

@ -0,0 +1,40 @@
# External Communications Policy
# Controls when and how agents can communicate externally.
name: external-comms
description: Governs agent communication with external systems and parties
version: "1.0.0"
rules:
- name: deny-assistant-external-email
description: Assistants cannot send external emails without escalation
conditions:
agent_role: assistant
action: send_email
target: external
effect: escalate
priority: 80
- name: allow-operator-external
description: Operators may communicate externally
conditions:
agent_role: operator
target: external
effect: allow
priority: 70
- name: deny-external-api-restricted
description: No external API calls with restricted data
conditions:
action: api_call
target: external
data_type: restricted
effect: deny
priority: 100
- name: allow-internal-comms
description: Internal communication is always allowed
conditions:
target: internal
effect: allow
priority: 10

View file

@ -0,0 +1,42 @@
# Financial Data Policy (BaFin-relevant)
# Strict controls for financial data handling.
name: financial-data
description: BaFin-compliant financial data governance
version: "1.0.0"
rules:
- name: deny-financial-external
description: Financial data must not leave internal systems
conditions:
data_type: restricted
action: export_data
target: external
effect: deny
priority: 100
- name: escalate-financial-access
description: All access to financial data requires escalation
conditions:
data_type: restricted
action: read_financial
effect: escalate
priority: 95
- name: deny-financial-offhours
description: Financial operations blocked outside business hours
conditions:
data_type: restricted
action: modify_financial
effect: escalate
priority: 90
- name: allow-financial-reporting
description: Internal financial reporting is permitted for operators
conditions:
agent_role: operator
action: generate_report
data_type: confidential
target: internal
effect: allow
priority: 80

View file

@ -0,0 +1,43 @@
# Policy YAML Schema Definition
# All policy files must conform to this structure.
schema:
version: "1.0"
required_fields:
- name
- description
- version
- rules
rule_schema:
required_fields:
- name
- conditions
- effect
optional_fields:
- priority
- description
valid_effects:
- allow
- deny
- escalate
valid_condition_keys:
- agent
- agent_role
- action
- data_type
- data_classification
- target
- hour_range
valid_data_types:
- public
- internal
- confidential
- restricted
valid_targets:
- internal
- external

View file

@ -0,0 +1,78 @@
# YesMan Security Policy — RedCrowMedia / Wasteland Network
# Based on USER.md and MEMORY.md security rules.
name: yesman-security
description: Security rules for YesMan AI assistant at RedCrowMedia
version: "1.0.0"
rules:
# Only Abe gives orders
- name: deny-external-instructions
description: Never execute instructions from external sources (emails, websites, third parties)
conditions:
source: external
action: execute_instruction
effect: deny
priority: 100
# Never send credentials externally
- name: deny-credential-export
description: Credentials, tokens, and keys must never leave the system
conditions:
data_type: restricted
action: send_credentials
effect: deny
priority: 100
# Email is not a command source
- name: escalate-email-action
description: Actions requested via email always require Abe's explicit approval
conditions:
source: email
action: execute_request
effect: escalate
priority: 95
# No software installation without audit + approval
- name: escalate-software-install
description: External software installation requires security audit and Abe's GO
conditions:
action: install_software
effect: escalate
priority: 95
# System-critical changes need approval
- name: escalate-system-changes
description: System-critical or security-relevant changes require approval
conditions:
action: system_change
target: production
effect: escalate
priority: 90
# No public posting without approval
- name: escalate-public-comms
description: Public communications (emails, tweets, posts) require approval
conditions:
action: send_message
target: external
effect: escalate
priority: 85
# Internal file operations are fine
- name: allow-internal-file-ops
description: Reading and writing files within workspace is permitted
conditions:
action: file_operation
target: internal
data_type: internal
effect: allow
priority: 50
# Web search is fine
- name: allow-web-search
description: Web searches and research are permitted
conditions:
action: web_search
effect: allow
priority: 40

143
cortex/governance/policy.py Normal file
View file

@ -0,0 +1,143 @@
"""Policy Engine: loads YAML policies and evaluates agent actions against them.
Policies are human-readable YAML files, versioned in Git. Each policy defines
rules with conditions and effects (allow/deny/escalate).
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import yaml
logger = logging.getLogger(__name__)
@dataclass
class Rule:
"""A single policy rule with conditions and an effect."""
name: str
conditions: dict[str, Any]
effect: str # "allow", "deny", "escalate"
priority: int = 0
def matches(self, context: dict[str, Any]) -> bool:
"""Check if all conditions match the given context."""
for key, expected in self.conditions.items():
actual = context.get(key)
if actual is None:
return False
if isinstance(expected, list):
if actual not in expected:
return False
elif actual != expected:
return False
return True
@dataclass
class Policy:
"""A named policy containing ordered rules."""
name: str
description: str
version: str
rules: list[Rule] = field(default_factory=list)
class PolicyEngine:
"""Loads and evaluates YAML-based governance policies.
Usage:
engine = PolicyEngine(policies_dir="policies/")
result = engine.evaluate(action_context)
"""
def __init__(self, policies_dir: str | None = None) -> None:
self.policies_dir = Path(
policies_dir or os.environ.get("GOVERNANCE_POLICIES_DIR", "policies/")
)
self.policies: list[Policy] = []
self._load_policies()
def _load_policies(self) -> None:
"""Load all YAML policy files from the policies directory."""
if not self.policies_dir.exists():
logger.warning("Policies directory not found: %s", self.policies_dir)
return
for path in sorted(self.policies_dir.glob("*.yaml")):
if path.name == "schema.yaml":
continue
try:
policy = self._parse_policy(path)
self.policies.append(policy)
logger.info("Loaded policy: %s (%d rules)", policy.name, len(policy.rules))
except Exception:
logger.exception("Failed to load policy: %s", path)
def _parse_policy(self, path: Path) -> Policy:
"""Parse a YAML file into a Policy object."""
with open(path, "r") as f:
data = yaml.safe_load(f)
rules = []
for rule_data in data.get("rules", []):
rules.append(
Rule(
name=rule_data["name"],
conditions=rule_data.get("conditions", {}),
effect=rule_data.get("effect", "deny"),
priority=rule_data.get("priority", 0),
)
)
return Policy(
name=data.get("name", path.stem),
description=data.get("description", ""),
version=data.get("version", "1.0.0"),
rules=rules,
)
def evaluate(self, context: dict[str, Any]) -> dict[str, Any]:
"""Evaluate an action context against all loaded policies.
Returns the highest-priority matching rule's effect, or 'allow' if no rules match.
"""
matches: list[tuple[Rule, Policy]] = []
for policy in self.policies:
for rule in policy.rules:
if rule.matches(context):
matches.append((rule, policy))
if not matches:
return {
"verdict": "allow",
"reason": "No matching policy rules",
"matched_rules": [],
}
# Sort by priority (highest first), then by strictness (deny > escalate > allow)
effect_order = {"deny": 0, "escalate": 1, "allow": 2}
matches.sort(key=lambda m: (-m[0].priority, effect_order.get(m[0].effect, 2)))
top_rule, top_policy = matches[0]
return {
"verdict": top_rule.effect,
"reason": f"Policy '{top_policy.name}', rule '{top_rule.name}'",
"matched_rules": [
{"policy": p.name, "rule": r.name, "effect": r.effect}
for r, p in matches
],
}
def reload(self) -> None:
"""Reload all policies from disk."""
self.policies.clear()
self._load_policies()

View file

@ -0,0 +1,109 @@
"""Audit Report Generator: creates compliance reports from collected evidence.
Generates structured reports grouped by ISO 27001 controls, time periods,
and agent activity.
"""
from __future__ import annotations
import json
import logging
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any
from governance.evidence import EvidenceCollector, EvidenceRecord
logger = logging.getLogger(__name__)
@dataclass
class ReportSection:
"""A section of an audit report."""
title: str
entries: list[dict[str, Any]]
summary: dict[str, Any]
class ReportGenerator:
"""Generates audit reports from governance evidence.
Usage:
generator = ReportGenerator(evidence_collector)
report = generator.generate_compliance_report()
"""
def __init__(self, evidence_collector: EvidenceCollector) -> None:
self.collector = evidence_collector
def generate_compliance_report(self) -> dict[str, Any]:
"""Generate a full compliance report grouped by ISO 27001 controls."""
evidence = self.collector.evidence
if not evidence:
return {"generated_at": _now_iso(), "status": "no_evidence", "sections": []}
by_control: dict[str, list[EvidenceRecord]] = defaultdict(list)
for record in evidence:
for control in record.controls:
by_control[control].append(record)
sections = []
for control_id in sorted(by_control.keys()):
records = by_control[control_id]
sections.append({
"control": control_id,
"total_events": len(records),
"verdicts": _count_verdicts(records),
"risk_distribution": _count_risk_levels(records),
"agents": list({r.agent for r in records}),
})
return {
"generated_at": _now_iso(),
"total_evidence": len(evidence),
"controls_covered": list(sorted(by_control.keys())),
"summary": {
"total_deny": sum(1 for e in evidence if e.verdict == "deny"),
"total_escalate": sum(1 for e in evidence if e.verdict == "escalate"),
"total_allow": sum(1 for e in evidence if e.verdict == "allow"),
"high_risk_events": sum(1 for e in evidence if e.risk_score >= 7),
},
"sections": sections,
}
def generate_agent_report(self, agent: str) -> dict[str, Any]:
"""Generate a report for a specific agent."""
evidence = self.collector.get_evidence(agent=agent)
return {
"generated_at": _now_iso(),
"agent": agent,
"total_actions": len(evidence),
"verdicts": _count_verdicts(evidence),
"risk_distribution": _count_risk_levels(evidence),
"actions": [e.to_dict() for e in evidence],
}
def export_json(self) -> str:
"""Export the compliance report as formatted JSON."""
report = self.generate_compliance_report()
return json.dumps(report, indent=2)
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _count_verdicts(records: list[EvidenceRecord]) -> dict[str, int]:
counts: dict[str, int] = defaultdict(int)
for r in records:
counts[r.verdict] += 1
return dict(counts)
def _count_risk_levels(records: list[EvidenceRecord]) -> dict[str, int]:
counts: dict[str, int] = defaultdict(int)
for r in records:
counts[r.risk_level] += 1
return dict(counts)

View file

@ -0,0 +1,126 @@
"""Risk Scorer: context-based risk scoring for agent actions.
Risk levels:
- low (0-3): routine operations
- elevated (4-6): notable but acceptable
- high (7-8): requires escalation
- critical (9-10): auto-deny + alert
Factors: data classification, target (internal/external), agent role, time of day.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any
logger = logging.getLogger(__name__)
# Data classification weights
DATA_WEIGHTS: dict[str, int] = {
"public": 0,
"internal": 2,
"confidential": 5,
"restricted": 8,
}
# Target weights
TARGET_WEIGHTS: dict[str, int] = {
"internal": 0,
"external": 3,
}
# Agent role weights (lower = more trusted)
ROLE_WEIGHTS: dict[str, int] = {
"admin": -1,
"operator": 0,
"assistant": 1,
"external": 3,
}
# Off-hours bonus (outside 8-18)
OFF_HOURS_BONUS = 2
@dataclass
class RiskResult:
"""Result of a risk assessment."""
value: int
level: str
factors: dict[str, Any]
@property
def is_acceptable(self) -> bool:
return self.value <= 6
def _classify_level(score: int) -> str:
"""Map a numeric score to a risk level."""
if score <= 3:
return "low"
elif score <= 6:
return "elevated"
elif score <= 8:
return "high"
else:
return "critical"
class RiskScorer:
"""Calculates contextual risk scores for agent actions.
Usage:
scorer = RiskScorer()
result = scorer.score({"data_type": "confidential", "target": "external"})
"""
def score(self, context: dict[str, Any]) -> RiskResult:
"""Score an action context and return a RiskResult.
Args:
context: Dict with optional keys:
- data_type: public|internal|confidential|restricted
- target: internal|external
- agent_role: admin|operator|assistant|external
- hour: 0-23 (defaults to current hour UTC)
"""
factors: dict[str, Any] = {}
total = 0
# Data classification
data_type = context.get("data_type", "public")
data_score = DATA_WEIGHTS.get(data_type, 0)
factors["data_type"] = {"value": data_type, "score": data_score}
total += data_score
# Target
target = context.get("target", "internal")
target_score = TARGET_WEIGHTS.get(target, 0)
factors["target"] = {"value": target, "score": target_score}
total += target_score
# Agent role
role = context.get("agent_role", "assistant")
role_score = ROLE_WEIGHTS.get(role, 1)
factors["agent_role"] = {"value": role, "score": role_score}
total += role_score
# Time of day
hour = context.get("hour")
if hour is None:
hour = datetime.now(timezone.utc).hour
is_off_hours = hour < 8 or hour >= 18
time_score = OFF_HOURS_BONUS if is_off_hours else 0
factors["time_of_day"] = {"hour": hour, "off_hours": is_off_hours, "score": time_score}
total += time_score
# Clamp to 0-10
total = max(0, min(10, total))
level = _classify_level(total)
logger.debug("Risk score: %d (%s) — factors: %s", total, level, factors)
return RiskResult(value=total, level=level, factors=factors)

View file

View file

@ -0,0 +1,193 @@
"""Proactive Intelligence: pattern-based predictions and anticipation.
Detects patterns in historical events and generates proactive alerts:
- SSL certificate expiry approaching
- Recurring issues (same error pattern at predictable intervals)
- Usage pattern anomalies
- Resource exhaustion trends
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from enum import Enum
from typing import Any, Callable
logger = logging.getLogger(__name__)
class AlertSeverity(Enum):
INFO = "info"
WARNING = "warning"
CRITICAL = "critical"
@dataclass
class Prediction:
"""A proactive prediction about a future event."""
pattern_name: str
description: str
severity: AlertSeverity
predicted_time: datetime | None = None
confidence: float = 0.0 # 0.0-1.0
recommended_action: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class PatternDefinition:
"""Definition of a detectable pattern."""
name: str
description: str
detector: Callable[[list[dict[str, Any]]], Prediction | None]
class Anticipator:
"""Proactive intelligence engine that detects patterns and generates predictions.
Usage:
anticipator = Anticipator()
anticipator.register_pattern(ssl_expiry_pattern)
predictions = anticipator.analyze(events)
"""
def __init__(self) -> None:
self.patterns: list[PatternDefinition] = []
self._register_builtin_patterns()
def register_pattern(self, pattern: PatternDefinition) -> None:
"""Register a new pattern detector."""
self.patterns.append(pattern)
logger.info("Registered pattern: %s", pattern.name)
def analyze(self, events: list[dict[str, Any]]) -> list[Prediction]:
"""Analyze events against all registered patterns.
Args:
events: List of event dicts with at minimum 'timestamp', 'type', 'data'.
Returns:
List of predictions, sorted by severity (critical first).
"""
predictions: list[Prediction] = []
for pattern in self.patterns:
try:
prediction = pattern.detector(events)
if prediction:
predictions.append(prediction)
logger.info(
"Pattern detected: %s (severity: %s, confidence: %.2f)",
prediction.pattern_name,
prediction.severity.value,
prediction.confidence,
)
except Exception:
logger.exception("Pattern detector failed: %s", pattern.name)
# Sort: critical first, then by confidence
severity_order = {AlertSeverity.CRITICAL: 0, AlertSeverity.WARNING: 1, AlertSeverity.INFO: 2}
predictions.sort(key=lambda p: (severity_order.get(p.severity, 3), -p.confidence))
return predictions
def _register_builtin_patterns(self) -> None:
"""Register built-in pattern detectors."""
self.register_pattern(PatternDefinition(
name="ssl_cert_expiry",
description="Detects SSL certificates approaching expiry",
detector=_detect_ssl_expiry,
))
self.register_pattern(PatternDefinition(
name="recurring_error",
description="Detects recurring error patterns",
detector=_detect_recurring_errors,
))
self.register_pattern(PatternDefinition(
name="usage_spike",
description="Detects unusual usage spikes",
detector=_detect_usage_spike,
))
def _detect_ssl_expiry(events: list[dict[str, Any]]) -> Prediction | None:
"""Detect SSL certificates that will expire within 14 days."""
now = datetime.now(timezone.utc)
threshold = timedelta(days=14)
for event in events:
if event.get("type") != "ssl_cert_check":
continue
expiry_str = event.get("data", {}).get("expiry")
if not expiry_str:
continue
try:
expiry = datetime.fromisoformat(expiry_str)
if expiry.tzinfo is None:
expiry = expiry.replace(tzinfo=timezone.utc)
except (ValueError, TypeError):
continue
remaining = expiry - now
if remaining < threshold:
domain = event.get("data", {}).get("domain", "unknown")
severity = AlertSeverity.CRITICAL if remaining.days < 3 else AlertSeverity.WARNING
return Prediction(
pattern_name="ssl_cert_expiry",
description=f"SSL certificate for {domain} expires in {remaining.days} days",
severity=severity,
predicted_time=expiry,
confidence=0.95,
recommended_action=f"Renew SSL certificate for {domain}",
metadata={"domain": domain, "days_remaining": remaining.days},
)
return None
def _detect_recurring_errors(events: list[dict[str, Any]]) -> Prediction | None:
"""Detect recurring error patterns (same error type appearing 3+ times)."""
error_counts: dict[str, int] = {}
for event in events:
if event.get("type") == "error":
error_key = event.get("data", {}).get("error_type", "unknown")
error_counts[error_key] = error_counts.get(error_key, 0) + 1
for error_type, count in error_counts.items():
if count >= 3:
return Prediction(
pattern_name="recurring_error",
description=f"Recurring error '{error_type}' detected ({count} occurrences)",
severity=AlertSeverity.WARNING,
confidence=min(0.5 + count * 0.1, 0.95),
recommended_action=f"Investigate root cause of '{error_type}'",
metadata={"error_type": error_type, "count": count},
)
return None
def _detect_usage_spike(events: list[dict[str, Any]]) -> Prediction | None:
"""Detect unusual usage spikes (>2x average in recent window)."""
usage_events = [e for e in events if e.get("type") == "usage_metric"]
if len(usage_events) < 10:
return None
values = [e.get("data", {}).get("value", 0) for e in usage_events]
avg = sum(values) / len(values)
recent = values[-3:] if len(values) >= 3 else values
recent_avg = sum(recent) / len(recent) if recent else 0
if avg > 0 and recent_avg > avg * 2:
return Prediction(
pattern_name="usage_spike",
description=f"Usage spike detected: recent avg {recent_avg:.1f} vs overall {avg:.1f}",
severity=AlertSeverity.WARNING,
confidence=0.7,
recommended_action="Investigate usage spike — potential anomaly or load increase",
metadata={"average": avg, "recent_average": recent_avg, "ratio": recent_avg / avg},
)
return None

View file

@ -0,0 +1,154 @@
"""Collective Learning: aggregates patterns across all internal agents.
Subscribes to the shared memory bus, collects insights from all
Vainplex-internal agents, and builds an aggregated knowledge base
for pattern detection and cross-agent learning.
🚨 STRICT DATA ISOLATION: Only Vainplex-internal agents participate.
No customer data. No customer agent insights. Ever.
"""
from __future__ import annotations
import json
import logging
import os
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory
logger = logging.getLogger(__name__)
@dataclass
class AggregatedPattern:
"""A pattern detected across multiple agents."""
topic: str
description: str
contributing_agents: list[str]
confidence: float
occurrence_count: int
first_seen: str
last_seen: str
insights: list[Insight] = field(default_factory=list)
class CollectiveLearning:
"""Aggregates patterns from all internal agents into collective knowledge.
Usage:
collective = CollectiveLearning(shared_memory)
await collective.start()
patterns = collective.get_patterns()
DATA ISOLATION: Only processes insights from ALLOWED_AGENTS.
"""
def __init__(self, shared_memory: SharedMemory) -> None:
self.shared_memory = shared_memory
self._insights_by_topic: dict[str, list[Insight]] = defaultdict(list)
self._patterns: list[AggregatedPattern] = []
async def start(self) -> None:
"""Start listening for insights on all topics."""
await self.shared_memory.subscribe(">", self._handle_insight)
logger.info("Collective learning started — listening for insights")
async def _handle_insight(self, insight: Insight) -> None:
"""Process an incoming insight."""
# Double-check data isolation
if insight.agent not in ALLOWED_AGENTS:
logger.warning("Rejected insight from non-internal agent: %s", insight.agent)
return
self._insights_by_topic[insight.topic].append(insight)
logger.debug(
"Collected insight: %s from %s (topic: %s)",
insight.content[:60], insight.agent, insight.topic,
)
# Re-analyze patterns when new data arrives
self._detect_patterns()
def _detect_patterns(self) -> None:
"""Analyze collected insights to find cross-agent patterns."""
new_patterns: list[AggregatedPattern] = []
for topic, insights in self._insights_by_topic.items():
if len(insights) < 2:
continue
agents = list({i.agent for i in insights})
if len(agents) < 2:
# Single-agent observations aren't "collective" patterns
continue
timestamps = sorted(i.timestamp for i in insights)
avg_confidence = sum(i.confidence for i in insights) / len(insights)
pattern = AggregatedPattern(
topic=topic,
description=f"Cross-agent pattern on '{topic}' observed by {', '.join(agents)}",
contributing_agents=agents,
confidence=avg_confidence,
occurrence_count=len(insights),
first_seen=timestamps[0],
last_seen=timestamps[-1],
insights=insights,
)
new_patterns.append(pattern)
self._patterns = new_patterns
def get_patterns(
self,
topic: str | None = None,
min_confidence: float = 0.0,
) -> list[AggregatedPattern]:
"""Retrieve detected collective patterns.
Args:
topic: Filter by topic (optional).
min_confidence: Minimum confidence threshold.
"""
patterns = self._patterns
if topic:
patterns = [p for p in patterns if p.topic == topic]
if min_confidence > 0:
patterns = [p for p in patterns if p.confidence >= min_confidence]
return patterns
def get_topic_summary(self) -> dict[str, Any]:
"""Get a summary of all topics and their insight counts."""
return {
topic: {
"count": len(insights),
"agents": list({i.agent for i in insights}),
"latest": max(i.timestamp for i in insights) if insights else None,
}
for topic, insights in self._insights_by_topic.items()
}
def export_knowledge(self) -> str:
"""Export collective knowledge as JSON."""
return json.dumps({
"exported_at": datetime.now(timezone.utc).isoformat(),
"allowed_agents": sorted(ALLOWED_AGENTS),
"patterns": [
{
"topic": p.topic,
"description": p.description,
"contributing_agents": p.contributing_agents,
"confidence": p.confidence,
"occurrence_count": p.occurrence_count,
"first_seen": p.first_seen,
"last_seen": p.last_seen,
}
for p in self._patterns
],
"topics": self.get_topic_summary(),
}, indent=2)

View file

@ -0,0 +1,420 @@
#!/usr/bin/env python3
"""Knowledge graph cleanup: classify unknowns, deduplicate entities, score relationships.
Usage:
darkplex cleanup [--classify] [--dedupe] [--score] [--dry-run]
If no flags given, runs all three steps.
"""
import argparse
import copy
import json
import logging
import os
import shutil
import sys
import time
from collections import defaultdict
from datetime import datetime, timedelta
from pathlib import Path
import requests
log = logging.getLogger("knowledge_cleanup")
KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
ENTITIES_PATH = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_PATH = KNOWLEDGE_DIR / "relationships.json"
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "qwen2.5:7b"
VALID_TYPES = {"person", "organization", "company", "project", "technology",
"location", "event", "concept", "product"}
def backup(path: Path) -> Path:
"""Create timestamped backup."""
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = path.with_suffix(f".backup_{ts}.json")
shutil.copy2(path, backup_path)
log.info(f"Backed up {path.name}{backup_path.name}")
return backup_path
def atomic_write(path: Path, data):
"""Write JSON atomically via temp file."""
tmp = path.with_suffix(".tmp")
with open(tmp, "w") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
tmp.replace(path)
log.info(f"Wrote {path.name}")
def load_entities() -> dict:
with open(ENTITIES_PATH) as f:
return json.load(f)
def load_relationships() -> dict:
with open(RELATIONSHIPS_PATH) as f:
return json.load(f)
def ollama_generate(prompt: str, timeout: int = 120) -> str:
"""Call Ollama generate API."""
resp = requests.post(f"{OLLAMA_URL}/api/generate", json={
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.1, "num_predict": 2000}
}, timeout=timeout)
resp.raise_for_status()
return resp.json().get("response", "")
# ─── Task 1: Classify Unknowns ───────────────────────────────────────────────
def classify_unknowns(entities: dict, dry_run: bool = False) -> dict:
"""Classify entities with type='unknown' using LLM."""
unknowns = {k: v for k, v in entities.items()
if isinstance(v, dict) and v.get("type") == "unknown"}
if not unknowns:
log.info("No unknown entities to classify.")
return entities
log.info(f"Classifying {len(unknowns)} unknown entities...")
names = list(unknowns.keys())
batch_size = 50
results = {}
for i in range(0, len(names), batch_size):
batch = names[i:i + batch_size]
batch_num = i // batch_size + 1
total_batches = (len(names) + batch_size - 1) // batch_size
log.info(f"Batch {batch_num}/{total_batches} ({len(batch)} entities)")
numbered = "\n".join(f"{j+1}. {name}" for j, name in enumerate(batch))
prompt = f"""Classify each entity name into exactly one category.
Categories: person, organization, company, project, technology, location, event, concept, product
If a name looks like a person's first name only (e.g. "sarah", "thomas"), classify as person.
If it's a common word that isn't clearly an entity (e.g. "ahnung", "wir", "evtl", "schau"), classify as concept.
If unsure, classify as concept.
Respond with ONLY a JSON object mapping the number to the category. Example:
{{"1": "person", "2": "company", "3": "concept"}}
Entities:
{numbered}
JSON:"""
try:
response = ollama_generate(prompt)
# Extract JSON from response
start = response.find("{")
end = response.rfind("}") + 1
if start >= 0 and end > start:
parsed = json.loads(response[start:end])
for idx_str, category in parsed.items():
idx = int(idx_str) - 1
if 0 <= idx < len(batch):
cat = category.strip().lower()
if cat in VALID_TYPES:
results[batch[idx]] = cat
except Exception as e:
log.warning(f"Batch {batch_num} failed: {e}")
continue
time.sleep(0.5) # Be nice to Ollama
# Apply results
stats = defaultdict(int)
for name, new_type in results.items():
old_type = entities[name].get("type", "unknown")
if old_type != new_type:
stats[f"{old_type}{new_type}"] += 1
if not dry_run:
entities[name]["type"] = new_type
entities[name]["classified_by"] = "llm_cleanup"
entities[name]["classified_at"] = datetime.now().isoformat()
log.info(f"Classified {len(results)}/{len(unknowns)} unknowns:")
for transition, count in sorted(stats.items(), key=lambda x: -x[1]):
log.info(f" {transition}: {count}")
remaining = sum(1 for k, v in entities.items()
if isinstance(v, dict) and v.get("type") == "unknown")
log.info(f"Remaining unknowns: {remaining}")
return entities
# ─── Task 2: Deduplicate ─────────────────────────────────────────────────────
def find_duplicates(entities: dict) -> list:
"""Find duplicate entity groups via case-insensitive matching."""
# Group by normalized name
groups = defaultdict(list)
for name in entities:
normalized = name.strip().lower()
groups[normalized].append(name)
# Also check for substring containment (e.g. "mondo gate" vs "mondo gate ag")
names_lower = {name: name.strip().lower() for name in entities}
sorted_names = sorted(names_lower.items(), key=lambda x: len(x[1]))
# Find names where one is a prefix/substring of another
substring_pairs = []
for i, (name_a, low_a) in enumerate(sorted_names):
if len(low_a) < 3:
continue
for name_b, low_b in sorted_names[i+1:]:
if low_a == low_b:
continue
if low_b.startswith(low_a + " ") or low_b.startswith(low_a + "-"):
substring_pairs.append((name_a, name_b))
# Build merge groups
merge_groups = []
# Exact case duplicates
for normalized, names in groups.items():
if len(names) > 1:
merge_groups.append(names)
# Substring matches (merge into existing groups or create new)
for short, long in substring_pairs:
found = False
for group in merge_groups:
if short in group or long in group:
if short not in group:
group.append(short)
if long not in group:
group.append(long)
found = True
break
if not found:
merge_groups.append([short, long])
return merge_groups
def pick_canonical(names: list, entities: dict) -> str:
"""Pick the most detailed entity name as canonical."""
# Prefer: longest name, most fields, not all-lowercase
def score(name):
e = entities.get(name, {})
fields = len(e) if isinstance(e, dict) else 0
length = len(name)
has_upper = int(any(c.isupper() for c in name))
return (has_upper, fields, length)
return max(names, key=score)
def deduplicate(entities: dict, relationships: dict, dry_run: bool = False) -> tuple:
"""Deduplicate entities and update relationships."""
groups = find_duplicates(entities)
if not groups:
log.info("No duplicates found.")
return entities, relationships
log.info(f"Found {len(groups)} duplicate groups:")
alias_map = {} # old_name → canonical_name
for group in groups:
canonical = pick_canonical(group, entities)
aliases = [n for n in group if n != canonical]
if not aliases:
continue
log.info(f" Canonical: '{canonical}' ← aliases: {aliases}")
for alias in aliases:
alias_map[alias] = canonical
if not dry_run:
# Merge fields into canonical
canonical_entry = entities.get(canonical, {})
if not isinstance(canonical_entry, dict):
canonical_entry = {}
existing_aliases = canonical_entry.get("aliases", [])
for alias in aliases:
if alias not in existing_aliases:
existing_aliases.append(alias)
alias_entry = entities.get(alias, {})
if isinstance(alias_entry, dict):
# Merge non-existing fields
for k, v in alias_entry.items():
if k not in canonical_entry and k not in ("type", "aliases"):
canonical_entry[k] = v
canonical_entry["aliases"] = existing_aliases
entities[canonical] = canonical_entry
# Remove aliases from entities
for alias in aliases:
if alias in entities:
del entities[alias]
# Update relationships
if not dry_run and alias_map:
updated_rels = {}
remapped = 0
for key, rel in relationships.items():
a = rel.get("a", "")
b = rel.get("b", "")
new_a = alias_map.get(a, a)
new_b = alias_map.get(b, b)
if new_a != a or new_b != b:
remapped += 1
rel["a"] = new_a
rel["b"] = new_b
new_key = f"{new_a}::{new_b}"
if new_key in updated_rels:
# Merge: sum counts, keep latest last_seen
existing = updated_rels[new_key]
existing["count"] = existing.get("count", 0) + rel.get("count", 0)
if rel.get("last_seen", "") > existing.get("last_seen", ""):
existing["last_seen"] = rel["last_seen"]
if rel.get("first_seen", "") < existing.get("first_seen", ""):
existing["first_seen"] = rel["first_seen"]
# Merge types
existing_types = set(existing.get("types", []))
existing_types.update(rel.get("types", []))
existing["types"] = list(existing_types)
else:
updated_rels[new_key] = rel
log.info(f"Remapped {remapped} relationships, merged {len(relationships) - len(updated_rels)} duplicates")
relationships = updated_rels
log.info(f"Merged {len(alias_map)} aliases into {len(set(alias_map.values()))} canonical entities")
return entities, relationships
# ─── Task 3: Relationship Scoring ────────────────────────────────────────────
def score_relationships(relationships: dict, dry_run: bool = False) -> dict:
"""Add strength scores and decay old relationships."""
now = datetime.now()
decay_threshold = now - timedelta(days=30)
removed = 0
scored = 0
decayed = 0
to_remove = []
for key, rel in relationships.items():
count = rel.get("count", 1)
last_seen_str = rel.get("last_seen", "")
first_seen_str = rel.get("first_seen", "")
types = rel.get("types", [])
# Base strength from count (log scale, capped at 1)
import math
count_score = min(1.0, math.log(count + 1) / math.log(100))
# Context diversity: more relationship types = stronger
diversity_score = min(1.0, len(types) * 0.3)
# Recency score
recency_score = 1.0
if last_seen_str:
try:
last_seen = datetime.fromisoformat(last_seen_str)
days_ago = (now - last_seen).days
if days_ago > 30:
recency_score = max(0.0, 1.0 - (days_ago - 30) / 180)
decayed += 1
except (ValueError, TypeError):
pass
# Combined strength
strength = round(
count_score * 0.4 + diversity_score * 0.3 + recency_score * 0.3,
3
)
if strength < 0.1:
to_remove.append(key)
removed += 1
else:
if not dry_run:
rel["strength"] = strength
scored += 1
if not dry_run:
for key in to_remove:
del relationships[key]
log.info(f"Scored {scored} relationships, decayed {decayed}, removed {removed} (strength < 0.1)")
return relationships
# ─── Main ────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Knowledge graph cleanup")
parser.add_argument("--classify", action="store_true", help="Classify unknown entities")
parser.add_argument("--dedupe", action="store_true", help="Deduplicate entities")
parser.add_argument("--score", action="store_true", help="Score relationships")
parser.add_argument("--dry-run", action="store_true", help="Show changes without writing")
args = parser.parse_args()
# If no specific flags, run all
run_all = not (args.classify or args.dedupe or args.score)
entities = load_entities()
relationships = load_relationships()
log.info(f"Loaded {len(entities)} entities, {len(relationships)} relationships")
# Backup before any modifications
if not args.dry_run:
backup(ENTITIES_PATH)
backup(RELATIONSHIPS_PATH)
if args.dry_run:
log.info("═══ DRY RUN — no files will be modified ═══")
if run_all or args.classify:
log.info("─── Step 1: Classify Unknowns ───")
entities = classify_unknowns(entities, dry_run=args.dry_run)
if run_all or args.dedupe:
log.info("─── Step 2: Deduplicate Entities ───")
entities, relationships = deduplicate(entities, relationships, dry_run=args.dry_run)
if run_all or args.score:
log.info("─── Step 3: Score Relationships ───")
relationships = score_relationships(relationships, dry_run=args.dry_run)
if not args.dry_run:
atomic_write(ENTITIES_PATH, entities)
atomic_write(RELATIONSHIPS_PATH, relationships)
log.info(f"Done. Final: {len(entities)} entities, {len(relationships)} relationships")
else:
log.info(f"Dry run complete. Would result in: {len(entities)} entities, {len(relationships)} relationships")
if __name__ == "__main__":
logging.basicConfig(
format='%(asctime)s %(name)s %(levelname)s %(message)s',
level=logging.INFO,
)
main()

View file

@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""
LLM-Powered Entity Extractor Uses Ollama for Named Entity Recognition.
Standalone module. No pip dependencies beyond stdlib.
Calls Ollama HTTP API with structured NER prompts.
Configuration via environment variables:
DARKPLEX_OLLAMA_URL Ollama base URL (default: http://localhost:11434)
DARKPLEX_OLLAMA_MODEL Model name (default: mistral:7b)
DARKPLEX_OLLAMA_TIMEOUT Timeout in seconds (default: 10)
DARKPLEX_EXTRACTOR llm|regex|auto (default: auto)
"""
import json
import logging
import os
import urllib.request
import urllib.error
log = logging.getLogger("llm-extractor")
OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434")
OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "llama3.2:1b")
OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30"))
VALID_TYPES = {"person", "organization", "company", "project", "technology",
"location", "event", "concept", "product"}
NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object.
Each key is the entity name (lowercase), each value has "type" and "context".
Valid types: person, organization, company, project, technology, location, event, concept, product
Rules:
- Skip common/generic words (the, system, message, etc.)
- Entity names should be lowercase, use hyphens for multi-word
- "context" is a 2-5 word description of the entity's role in the text
- If no entities found, return empty JSON object
- Return ONLY valid JSON, no explanation
Text:
{text}
JSON:"""
BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object.
Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context".
Valid types: person, organization, company, project, technology, location, event, concept, product
Rules:
- Skip common/generic words
- "context" is a 2-5 word description
- If no entities found, return empty JSON object
- Return ONLY valid JSON, no markdown, no explanation
Texts:
{texts}
JSON:"""
def _call_ollama(prompt: str) -> str | None:
"""Call Ollama generate API. Returns response text or None on failure."""
payload = json.dumps({
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.1, "num_predict": 1024},
}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/api/generate",
data=payload,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp:
data = json.loads(resp.read().decode())
return data.get("response", "")
except (urllib.error.URLError, TimeoutError, OSError) as e:
log.warning(f"Ollama call failed: {e}")
return None
except Exception as e:
log.warning(f"Ollama unexpected error: {e}")
return None
def _parse_json_response(text: str) -> dict:
"""Extract JSON dict from LLM response, handling markdown fences etc."""
if not text:
return {}
# Strip markdown code fences
text = text.strip()
if text.startswith("```"):
lines = text.split("\n")
lines = [l for l in lines if not l.strip().startswith("```")]
text = "\n".join(lines)
# Find the JSON object
start = text.find("{")
if start == -1:
return {}
# Find matching closing brace
depth = 0
for i in range(start, len(text)):
if text[i] == "{":
depth += 1
elif text[i] == "}":
depth -= 1
if depth == 0:
try:
return json.loads(text[start:i + 1])
except json.JSONDecodeError:
return {}
return {}
def _normalize_entities(raw: dict) -> dict:
"""Normalize and validate extracted entities."""
result = {}
for name, info in raw.items():
if not isinstance(info, dict):
continue
name = name.strip().lower().replace("_", "-").replace(" ", "-")
if len(name) < 2 or len(name) > 80:
continue
etype = info.get("type", "unknown").lower().strip()
if etype not in VALID_TYPES:
# Map common aliases
aliases = {"org": "organization", "tech": "technology", "loc": "location",
"place": "location", "tool": "technology", "framework": "technology",
"language": "technology", "app": "product", "software": "product",
"service": "product", "group": "organization", "team": "organization"}
etype = aliases.get(etype, "concept")
context = info.get("context", "")
if isinstance(context, str):
context = context[:100]
else:
context = ""
result[name] = {"type": etype, "context": context, "match": "llm"}
return result
def extract_entities_llm(text: str) -> dict[str, dict] | None:
"""
Extract entities from text using Ollama LLM.
Returns dict of {name: {type, context, match}} or None if LLM unavailable.
None signals caller to fall back to regex.
"""
if not text or len(text) < 10:
return {}
# Truncate very long texts
if len(text) > 2000:
text = text[:2000]
prompt = NER_PROMPT.format(text=text)
response = _call_ollama(prompt)
if response is None:
return None # Signal fallback
raw = _parse_json_response(response)
return _normalize_entities(raw)
def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None:
"""
Extract entities from multiple texts in one LLM call.
Returns combined dict or None if LLM unavailable.
"""
if not texts:
return {}
# Filter and truncate
clean = []
for t in texts:
if t and len(t) >= 10:
clean.append(t[:500] if len(t) > 500 else t)
if not clean:
return {}
# Limit batch size to keep prompt reasonable
if len(clean) > 10:
clean = clean[:10]
numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean))
prompt = BATCH_PROMPT.format(texts=numbered)
response = _call_ollama(prompt)
if response is None:
return None
raw = _parse_json_response(response)
return _normalize_entities(raw)
def is_available() -> bool:
"""Check if Ollama is reachable."""
try:
req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET")
with urllib.request.urlopen(req, timeout=3) as resp:
return resp.status == 200
except Exception:
return False

830
cortex/intelligence/loop.py Normal file
View file

@ -0,0 +1,830 @@
#!/usr/bin/env python3
"""
Darkplex Loop The single heartbeat of the intelligence pipeline.
One process. One loop. One state machine.
Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
Each cycle:
1. INGEST Fetch new events from NATS (batch consumer pull)
2. EXTRACT Pull entities and relationships from events
3. BRIDGE Sync cortex outputs to knowledge engine
4. VERIFY Check that real output was produced
5. REPORT Update state, alert on failure
States:
RUNNING Everything nominal
DEGRADED A step failed, but loop continues with recovery attempts
EMERGENCY Critical failure, alerting
Usage:
darkplex loop # Run loop (default: 1h cycle)
darkplex loop --once # Single cycle, then exit
darkplex loop --cycle 3600 # Custom cycle interval (seconds)
darkplex loop --status # Print current state and exit
darkplex loop --check # Check for new events, exit 0=new 1=none
"""
import json
import logging
import os
import re
import signal
import subprocess
import sys
import time
import traceback
import urllib.request
from collections import deque
from datetime import datetime, timezone
from pathlib import Path
# ── Paths (configurable via env) ─────────────────────────────────────────────
BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
SCRIPT_DIR = BASE_DIR / "scripts"
LEVEL4_DIR = SCRIPT_DIR / "level4"
LOG_DIR = BASE_DIR / "logs"
STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
DEFAULT_CYCLE_SECONDS = 3600 # 1 hour
ALERT_COOLDOWN = 3600 # 1 alert per hour max
log = logging.getLogger("darkplex-loop")
# ── State Machine ────────────────────────────────────────────────────────────
class LoopState:
"""Persistent state for the Darkplex Loop."""
def __init__(self):
self.status = "INIT"
self.cycle_count = 0
self.last_cycle = None
self.last_success = None
self.last_failure = None
self.last_alert = None
self.consecutive_failures = 0
self.entities_total = 0
self.relationships_total = 0
self.entities_extracted_last = 0
self.entities_new_last = 0
self.events_processed_last = 0
self.steps = {}
self.error = None
self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}]
self.quality_metrics = {} # {unknown_rate, llm_success_rate, avg_entities_per_event}
self.quality_history = [] # last 10: [{cycle, unknown_rate, llm_success_rate}]
self.ollama_status = "unknown" # healthy|degraded|down
self._load()
def _load(self):
try:
data = json.loads(STATE_FILE.read_text())
for k, v in data.items():
if hasattr(self, k):
setattr(self, k, v)
except (FileNotFoundError, json.JSONDecodeError):
pass
def save(self):
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
def record_perf(self, perf: dict):
"""Record performance metrics for this cycle."""
self.perf = perf
# Include unknown_rate in perf_history if available
if self.quality_metrics:
perf["unknown_rate"] = self.quality_metrics.get("unknown_rate", 0)
self.perf_history.append(perf)
self.perf_history = self.perf_history[-10:] # keep last 10
def perf_averages(self) -> dict:
"""Running averages over last 10 cycles."""
if not self.perf_history:
return {}
keys = self.perf_history[0].keys()
return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
def record_success(self, step_results: dict):
self.status = "RUNNING"
self.consecutive_failures = 0
self.last_success = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_success
self.cycle_count += 1
self.steps = step_results
self.error = None
self.save()
def record_failure(self, step: str, error: str):
self.consecutive_failures += 1
self.last_failure = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_failure
self.cycle_count += 1
self.error = f"{step}: {error}"
if self.consecutive_failures >= 3:
self.status = "EMERGENCY"
else:
self.status = "DEGRADED"
self.save()
def can_alert(self) -> bool:
if not self.last_alert:
return True
try:
last = datetime.fromisoformat(self.last_alert)
return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
except (ValueError, TypeError):
return True
def mark_alerted(self):
self.last_alert = datetime.now(timezone.utc).isoformat()
self.save()
# ── Pipeline Steps ───────────────────────────────────────────────────────────
def _nats_cmd():
"""Build NATS CLI base command with auth."""
nats_bin = os.environ.get("NATS_BIN", "nats")
nats_url = os.environ.get("NATS_URL", "")
if nats_url:
return [nats_bin, "-s", nats_url]
return [nats_bin]
def check_new_events() -> int:
"""Return number of pending events in the consumer. 0 = nothing new."""
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return -1
info = json.loads(r.stdout)
return info.get("num_pending", 0)
except Exception as e:
log.warning(f"check_new_events failed: {e}")
return -1
def step_ingest(state: LoopState) -> dict:
"""Step 1: Fetch new events from NATS using batch consumer pull."""
log.info("STEP 1: INGEST — Fetching events from NATS")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
# Check how many pending
pending = check_new_events()
if pending == 0:
log.info("INGEST: No new events — skipping cycle")
return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
log.info(f"INGEST: {pending} pending events in consumer")
events = []
total_fetched = 0
parse_errors = 0
# Fetch in batches
remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
try:
batch_size = min(remaining, NATS_BATCH_SIZE)
result = subprocess.run(
_nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
"--count", str(batch_size), "--raw"],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
return _step_ingest_sequential(state)
for line in result.stdout.strip().split("\n"):
if not line.strip():
continue
try:
data = json.loads(line)
events.append(data)
total_fetched += 1
except json.JSONDecodeError:
parse_errors += 1
except subprocess.TimeoutExpired:
log.warning("Batch fetch timed out, falling back to sequential")
return _step_ingest_sequential(state)
# Update sequence tracking (get current stream seq from consumer info)
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode == 0:
info = json.loads(r.stdout)
stream_seq = info["delivered"]["stream_seq"]
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
except Exception:
log.warning("Could not save last processed sequence")
log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
def _step_ingest_sequential(state: LoopState) -> dict:
"""Fallback: sequential fetch via stream get (slow but reliable)."""
import base64
log.info("INGEST FALLBACK: Sequential fetch")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
last_processed_seq = 0
try:
if last_processed_seq_file.exists():
last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
except Exception:
pass
r = subprocess.run(
_nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return {"events": [], "total_scanned": 0, "skipped": 0}
info = json.loads(r.stdout)
end_seq = info["state"]["last_seq"]
start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
events = []
skipped = 0
for seq in range(start_seq, end_seq + 1):
try:
result = subprocess.run(
_nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
capture_output=True, text=True, timeout=5,
)
if result.returncode != 0:
skipped += 1
continue
msg = json.loads(result.stdout)
if "conversation_message_in" not in msg.get("subject", ""):
skipped += 1
continue
data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
events.append(data)
except Exception:
skipped += 1
try:
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
except Exception:
pass
log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
def step_extract(state: LoopState, events: list) -> dict:
"""Step 2: Extract entities and relationships from events."""
log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
if not events:
log.info("EXTRACT: No events to process")
return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
sys.path.insert(0, str(LEVEL4_DIR))
import importlib.util
spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
em = importlib.util.module_from_spec(spec)
spec.loader.exec_module(em)
# Try LLM batch extraction first
from llm_extractor import extract_entities_llm_batch, is_available as llm_available
use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
llm_ok = use_llm and llm_available()
if llm_ok:
log.info("EXTRACT: Using LLM extractor (Ollama)")
else:
log.info("EXTRACT: Using regex extractor (fallback)")
known = em.load_known_entities()
entities = em.load_json(ENTITIES_FILE)
relationships = em.load_json(RELATIONSHIPS_FILE)
total_extracted = 0
new_entities = 0
new_relationships = 0
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
# Prepare texts for potential batch LLM processing
event_texts = []
for event in events:
payload = event.get("payload", {})
text = payload.get("text_preview", "") or payload.get("text", "")
if isinstance(text, list):
parts = []
for t in text:
parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
text = " ".join(parts)
if not isinstance(text, str):
text = str(text)
score = _importance(text) if text else 0.0
event_texts.append((text, score))
# LLM batch extraction for qualifying texts (cap at 50 to keep cycle time reasonable)
llm_results = {}
if llm_ok:
batch_texts = [t for t, s in sorted(
[(t, s) for t, s in event_texts if t and s >= 0.4],
key=lambda x: -x[1] # highest importance first
)][:50]
if batch_texts:
consecutive_fails = 0
for i in range(0, len(batch_texts), 10):
if consecutive_fails >= 3:
log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
llm_ok = False
break
chunk = batch_texts[i:i+10]
batch_result = extract_entities_llm_batch(chunk)
if batch_result:
llm_results.update(batch_result)
consecutive_fails = 0
else:
consecutive_fails += 1
if llm_results:
log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
for idx, event in enumerate(events):
text, score = event_texts[idx]
if not text or score < 0.4:
continue
if llm_ok and llm_results:
# Use LLM results + known entity matching
found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
# Add LLM entities that appear in this text
text_lower = text.lower()
for name, info in llm_results.items():
variants = [name, name.replace("-", " "), name.replace("-", "")]
if any(v in text_lower for v in variants if len(v) > 2):
found[name] = info
else:
found = em.extract_entities(text, known)
if not found:
continue
total_extracted += len(found)
names = list(found.keys())
for name, info in found.items():
if name not in entities:
entities[name] = {
"type": info["type"],
"source": "darkplex-loop",
"first_seen": ts_now,
}
new_entities += 1
known[name] = entities[name]
if len(names) >= 2:
for i in range(len(names)):
for j in range(i + 1, min(len(names), i + 5)):
a, b = min(names[i], names[j]), max(names[i], names[j])
key = f"{a}::{b}"
if key in relationships:
relationships[key]["count"] = relationships[key].get("count", 1) + 1
relationships[key]["last_seen"] = ts_now
else:
relationships[key] = {
"a": a, "b": b, "types": ["co-occurrence"],
"count": 1, "first_seen": ts_now, "last_seen": ts_now,
}
new_relationships += 1
em.save_json(ENTITIES_FILE, entities)
em.save_json(RELATIONSHIPS_FILE, relationships)
state.entities_total = len(entities)
state.relationships_total = len(relationships)
state.entities_extracted_last = total_extracted
state.entities_new_last = new_entities
state.events_processed_last = len(events)
log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
def step_bridge(state: LoopState) -> dict:
"""Step 3: Run knowledge bridge."""
log.info("STEP 3: BRIDGE — Syncing cortex outputs")
bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
if not bridge_script.exists():
log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
return {"status": "skipped", "reason": "script not found"}
result = subprocess.run(
[sys.executable, str(bridge_script), "sync"],
capture_output=True, text=True, timeout=120,
)
if result.returncode != 0:
log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
return {"status": "failed", "error": result.stderr[:200]}
bridged = 0
for line in result.stdout.split("\n"):
m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
if m:
bridged += int(m.group(1))
log.info(f"BRIDGE: {bridged} items bridged")
return {"status": "ok", "bridged": bridged}
def _check_quality(state: LoopState, extract_result: dict) -> list:
"""Check entity quality metrics. Returns list of issues/warnings."""
issues = []
# Load entities and compute unknown_rate
try:
entities = json.loads(ENTITIES_FILE.read_text()) if ENTITIES_FILE.exists() else {}
except (json.JSONDecodeError, OSError):
entities = {}
total = len(entities)
unknown_count = sum(1 for e in entities.values() if e.get("type") == "unknown")
unknown_rate = (unknown_count / total * 100) if total > 0 else 0.0
events_processed = state.events_processed_last or 1
extracted = extract_result.get("extracted", 0)
avg_entities_per_event = extracted / events_processed if events_processed > 0 else 0.0
# Estimate LLM success rate from extraction (if LLM was used, new_entities > 0 is a proxy)
llm_success_rate = 100.0 # default if no LLM used
# We track this per-cycle based on whether extraction produced results
if events_processed > 10 and extracted == 0:
llm_success_rate = 0.0
state.quality_metrics = {
"unknown_rate": round(unknown_rate, 1),
"llm_success_rate": round(llm_success_rate, 1),
"avg_entities_per_event": round(avg_entities_per_event, 2),
}
if unknown_rate > 30:
issues.append(f"High unknown entity rate: {unknown_rate:.1f}% ({unknown_count}/{total})")
# Track quality history and detect trends
state.quality_history.append({
"cycle": state.cycle_count + 1,
"unknown_rate": round(unknown_rate, 1),
"llm_success_rate": round(llm_success_rate, 1),
})
state.quality_history = state.quality_history[-10:] # keep last 10
# Check if unknown_rate rising 3 cycles in a row
if len(state.quality_history) >= 3:
last3 = [h["unknown_rate"] for h in state.quality_history[-3:]]
if last3[0] < last3[1] < last3[2]:
issues.append(f"Entity quality degrading — unknown_rate rising: {last3}")
log.info(f"VERIFY/QUALITY: unknown_rate={unknown_rate:.1f}%, avg_entities/event={avg_entities_per_event:.2f}")
return issues
def _check_ollama(state: LoopState) -> list:
"""Check Ollama health. Returns list of issues."""
issues = []
model = os.environ.get("DARKPLEX_OLLAMA_MODEL", os.environ.get("OLLAMA_MODEL", ""))
try:
req = urllib.request.Request("http://localhost:11434/api/tags", method="GET")
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read())
models = [m.get("name", "") for m in data.get("models", [])]
if model and not any(model in m for m in models):
state.ollama_status = "degraded"
issues.append(f"Ollama up but model '{model}' not loaded (available: {models[:5]})")
log.warning(f"VERIFY/OLLAMA: degraded — model '{model}' not in {models[:5]}")
else:
state.ollama_status = "healthy"
log.info(f"VERIFY/OLLAMA: healthy ({len(models)} models)")
except Exception as e:
state.ollama_status = "down"
issues.append(f"Ollama down: {e}")
log.warning(f"VERIFY/OLLAMA: down — {e}")
return issues
def _check_performance(state: LoopState) -> list:
"""Check for performance regressions. Returns list of issues."""
issues = []
if len(state.perf_history) < 2:
return issues
current = state.perf
avgs = state.perf_averages()
# Check total time vs rolling average
curr_total = current.get("total_ms", 0)
avg_total = avgs.get("total_ms", 0)
if avg_total > 0 and curr_total > 2 * avg_total:
issues.append(f"Performance regression detected: {curr_total}ms vs avg {avg_total}ms")
# Check extraction time
extract_ms = current.get("extract_ms", 0)
if extract_ms > 120000:
issues.append(f"Extraction too slow: {extract_ms}ms (>2min)")
if issues:
for i in issues:
log.warning(f"VERIFY/PERF: {i}")
else:
log.info(f"VERIFY/PERF: OK (total={curr_total}ms, avg={avg_total}ms)")
return issues
def step_verify(state: LoopState, extract_result: dict) -> dict:
"""Step 4: Verify output quality."""
log.info("STEP 4: VERIFY — Checking output quality")
issues = []
# File integrity checks
for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
if not f.exists():
issues.append(f"{label} file missing")
else:
try:
data = json.loads(f.read_text())
if not data:
issues.append(f"{label} file is empty")
except json.JSONDecodeError:
issues.append(f"{label} file is corrupt JSON")
events_processed = state.events_processed_last
extracted = extract_result.get("extracted", 0)
if events_processed > 10 and extracted == 0:
issues.append(f"0 entities from {events_processed} events — extraction may be broken")
# NATS check
try:
r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
if r.returncode != 0:
issues.append("NATS unreachable")
except Exception as e:
issues.append(f"NATS check failed: {e}")
# New monitoring checks
issues.extend(_check_quality(state, extract_result))
issues.extend(_check_ollama(state))
issues.extend(_check_performance(state))
verdict = "PASS" if not issues else "FAIL"
log.info(f"VERIFY: {verdict}{len(issues)} issues")
for issue in issues:
log.warning(f"{issue}")
return {"verdict": verdict, "issues": issues}
def step_report(state: LoopState, verify_result: dict):
"""Step 5: Alert if degraded/emergency."""
if state.status == "RUNNING":
return
if not state.can_alert():
log.info("REPORT: Alert cooldown active, skipping")
return
severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
msg = (
f"Darkplex Loop {severity}\n"
f"Consecutive failures: {state.consecutive_failures}\n"
f"Error: {state.error}\n"
f"Issues: {', '.join(verify_result.get('issues', []))}"
)
log.warning(f"REPORT: Sending alert — {state.status}")
try:
subprocess.run(
["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
capture_output=True, text=True, timeout=15,
)
except Exception:
pass
flag = LOG_DIR / "darkplex-loop-alert.flag"
flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
state.mark_alerted()
# ── Helpers ──────────────────────────────────────────────────────────────────
def _importance(text: str) -> float:
"""Importance scoring for event text."""
if not text:
return 0.0
score = 0.3
if len(text) > 200: score += 0.1
if len(text) > 500: score += 0.1
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
if caps > 3: score += 0.1
if caps > 8: score += 0.1
for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
if p.lower() in text.lower():
score -= 0.3
for w in ["meeting", "project", "company", "contract", "decision", "strategy",
"budget", "deadline", "milestone", "partnership", "investment", "revenue",
"client", "proposal", "agreement"]:
if w in text.lower():
score += 0.05
return max(0.0, min(1.0, score))
def print_status():
"""Print current loop state."""
state = LoopState()
ent_count = rel_count = 0
try:
ent_count = len(json.loads(ENTITIES_FILE.read_text()))
except Exception:
pass
try:
rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
except Exception:
pass
icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "")
print(f"{icon} Status: {state.status}")
print(f"Cycles: {state.cycle_count}")
print(f"Last cycle: {state.last_cycle or 'never'}")
print(f"Last success: {state.last_success or 'never'}")
print(f"Last failure: {state.last_failure or 'never'}")
print(f"Failures: {state.consecutive_failures}")
print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
print(f"Relationships:{rel_count} total")
if state.quality_metrics:
qm = state.quality_metrics
print(f"Quality: unknown_rate={qm.get('unknown_rate', '?')}% llm_success={qm.get('llm_success_rate', '?')}% avg_ent/event={qm.get('avg_entities_per_event', '?')}")
print(f"Ollama: {state.ollama_status}")
if state.perf:
print(f"Last perf: {state.perf}")
if state.error:
print(f"Error: {state.error}")
# ── Main Loop ────────────────────────────────────────────────────────────────
def _ms_since(t0: float) -> int:
return int((time.monotonic() - t0) * 1000)
def run_cycle(state: LoopState) -> bool:
"""Run one complete pipeline cycle. Returns True on success."""
log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
step_results = {}
perf = {}
t_cycle = time.monotonic()
try:
t0 = time.monotonic()
ingest = step_ingest(state)
perf["ingest_ms"] = _ms_since(t0)
step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
# Early skip if no new events
if ingest.get("skip_reason") == "no_new_events":
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
state.save()
log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
return True
t0 = time.monotonic()
extract = step_extract(state, ingest["events"])
perf["extract_ms"] = _ms_since(t0)
step_results["extract"] = extract
t0 = time.monotonic()
bridge = step_bridge(state)
perf["bridge_ms"] = _ms_since(t0)
step_results["bridge"] = bridge
t0 = time.monotonic()
verify = step_verify(state, extract)
perf["verify_ms"] = _ms_since(t0)
step_results["verify"] = verify
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
state.record_failure("verify", "; ".join(verify["issues"]))
step_report(state, verify)
return False
state.record_success(step_results)
avgs = state.perf_averages()
log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status}{perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
flag = LOG_DIR / "darkplex-loop-alert.flag"
if flag.exists():
flag.unlink()
return True
except Exception as e:
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
step_name = "unknown"
for name in ["ingest", "extract", "bridge", "verify"]:
if name not in step_results:
step_name = name
break
log.error(f"CYCLE FAILED at {step_name}: {e}")
log.error(traceback.format_exc())
state.record_failure(step_name, str(e)[:300])
step_report(state, {"issues": [str(e)]})
return False
def main():
"""CLI entry point for `darkplex loop`."""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
logging.StreamHandler(),
],
)
LOG_DIR.mkdir(parents=True, exist_ok=True)
args = sys.argv[1:]
if "--status" in args:
print_status()
return
if "--check" in args:
pending = check_new_events()
if pending > 0:
print(f"NEW: {pending} events pending")
sys.exit(0)
elif pending == 0:
print("NONE: No new events")
sys.exit(1)
else:
print("ERROR: Could not check")
sys.exit(2)
once = "--once" in args
cycle_seconds = DEFAULT_CYCLE_SECONDS
for i, arg in enumerate(args):
if arg == "--cycle" and i + 1 < len(args):
cycle_seconds = int(args[i + 1])
state = LoopState()
log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
running = True
def handle_signal(sig, frame):
nonlocal running
log.info("Shutdown signal received")
running = False
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while running:
run_cycle(state)
if once:
break
log.info(f"Sleeping {cycle_seconds}s until next cycle...")
for _ in range(cycle_seconds):
if not running:
break
time.sleep(1)
log.info("Darkplex Loop stopped")

View file

@ -0,0 +1,152 @@
"""Cross-Agent Memory Bus: NATS pub/sub for agent insights.
Agents publish insights (observations, learned facts, warnings) to the bus.
Other agents subscribe to topics relevant to their function.
DATA ISOLATION: Only Vainplex-internal agents participate.
"""
from __future__ import annotations
import json
import logging
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any, Callable, Awaitable
logger = logging.getLogger(__name__)
NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
# Only these agents are allowed to participate in shared memory
ALLOWED_AGENTS: set[str] = set(
os.environ.get("INTELLIGENCE_ALLOWED_AGENTS", "claudia,vera,stella,viola").split(",")
)
INSIGHT_SUBJECT_PREFIX = "darkplex.intelligence.insights"
@dataclass
class Insight:
"""An agent insight to be shared across the memory bus."""
agent: str
topic: str
content: str
confidence: float = 0.8 # 0.0-1.0
tags: list[str] = field(default_factory=list)
timestamp: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
if not self.timestamp:
self.timestamp = datetime.now(timezone.utc).isoformat()
def to_json(self) -> str:
return json.dumps({
"agent": self.agent,
"topic": self.topic,
"content": self.content,
"confidence": self.confidence,
"tags": self.tags,
"timestamp": self.timestamp,
"metadata": self.metadata,
})
@classmethod
def from_json(cls, data: str) -> Insight:
d = json.loads(data)
return cls(**d)
InsightHandler = Callable[[Insight], Awaitable[None]]
class SharedMemory:
"""Cross-agent memory bus using NATS pub/sub.
Usage:
memory = SharedMemory(agent_name="claudia")
await memory.connect()
await memory.publish(Insight(agent="claudia", topic="infra", content="..."))
await memory.subscribe("infra", handler)
Enforces data isolation: only allowed agents can publish/subscribe.
"""
def __init__(self, agent_name: str, nats_url: str | None = None) -> None:
if agent_name not in ALLOWED_AGENTS:
raise ValueError(
f"Agent '{agent_name}' is not allowed in shared memory. "
f"Allowed: {ALLOWED_AGENTS}"
)
self.agent_name = agent_name
self.nats_url = nats_url or NATS_URL
self._nats_client: Any = None
self._subscriptions: list[Any] = []
async def connect(self) -> None:
"""Connect to the NATS server."""
try:
import nats
self._nats_client = await nats.connect(self.nats_url)
logger.info("SharedMemory connected for agent '%s'", self.agent_name)
except Exception:
logger.exception("Failed to connect SharedMemory to NATS")
raise
async def publish(self, insight: Insight) -> None:
"""Publish an insight to the memory bus.
Args:
insight: The insight to share. Agent field must match this instance's agent.
"""
if not self._nats_client:
raise RuntimeError("Not connected. Call connect() first.")
if insight.agent not in ALLOWED_AGENTS:
raise ValueError(f"Agent '{insight.agent}' not allowed to publish insights")
subject = f"{INSIGHT_SUBJECT_PREFIX}.{insight.topic}"
await self._nats_client.publish(subject, insight.to_json().encode())
logger.debug(
"Published insight: %s/%s by %s", insight.topic, insight.content[:50], insight.agent
)
async def subscribe(self, topic: str, handler: InsightHandler) -> None:
"""Subscribe to insights on a topic.
Args:
topic: Topic to subscribe to (supports NATS wildcards).
handler: Async callback for received insights.
"""
if not self._nats_client:
raise RuntimeError("Not connected. Call connect() first.")
subject = f"{INSIGHT_SUBJECT_PREFIX}.{topic}"
async def _message_handler(msg: Any) -> None:
try:
insight = Insight.from_json(msg.data.decode())
if insight.agent not in ALLOWED_AGENTS:
logger.warning(
"Ignoring insight from non-allowed agent: %s", insight.agent
)
return
await handler(insight)
except Exception:
logger.exception("Error handling insight message")
sub = await self._nats_client.subscribe(subject, cb=_message_handler)
self._subscriptions.append(sub)
logger.info("Subscribed to insights: %s", subject)
async def close(self) -> None:
"""Unsubscribe and disconnect."""
for sub in self._subscriptions:
await sub.unsubscribe()
self._subscriptions.clear()
if self._nats_client:
await self._nats_client.close()
self._nats_client = None

View file

@ -0,0 +1,193 @@
"""Temporal Context API: chronological knowledge retrieval.
Queries NATS events and ChromaDB with a time dimension to answer:
"What do we know about X, chronologically?"
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
logger = logging.getLogger(__name__)
# Default config from environment
NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
CHROMADB_URL = os.environ.get("CHROMADB_URL", "http://localhost:8000")
@dataclass
class TemporalEntry:
"""A knowledge entry with temporal metadata."""
timestamp: datetime
source: str # "nats" or "chromadb"
topic: str
content: str
metadata: dict[str, Any] = field(default_factory=dict)
relevance_score: float = 0.0
@dataclass
class TemporalQuery:
"""Query parameters for temporal context retrieval."""
topic: str
start_time: datetime | None = None
end_time: datetime | None = None
limit: int = 50
sources: list[str] = field(default_factory=lambda: ["nats", "chromadb"])
class TemporalContext:
"""Retrieves chronological knowledge from NATS events and ChromaDB.
Usage:
ctx = TemporalContext()
entries = await ctx.query(TemporalQuery(topic="ssl-cert"))
"""
def __init__(
self,
nats_url: str | None = None,
chromadb_url: str | None = None,
) -> None:
self.nats_url = nats_url or NATS_URL
self.chromadb_url = chromadb_url or CHROMADB_URL
self._nats_client: Any = None
self._chroma_client: Any = None
async def connect(self) -> None:
"""Initialize connections to NATS and ChromaDB."""
try:
import nats
self._nats_client = await nats.connect(self.nats_url)
logger.info("Connected to NATS: %s", self.nats_url)
except Exception:
logger.exception("Failed to connect to NATS")
try:
import chromadb
self._chroma_client = chromadb.HttpClient(host=self.chromadb_url)
logger.info("Connected to ChromaDB: %s", self.chromadb_url)
except Exception:
logger.exception("Failed to connect to ChromaDB")
async def query(self, query: TemporalQuery) -> list[TemporalEntry]:
"""Query temporal context across configured sources.
Returns entries sorted chronologically (oldest first).
"""
entries: list[TemporalEntry] = []
if "nats" in query.sources and self._nats_client:
nats_entries = await self._query_nats(query)
entries.extend(nats_entries)
if "chromadb" in query.sources and self._chroma_client:
chroma_entries = self._query_chromadb(query)
entries.extend(chroma_entries)
# Sort chronologically
entries.sort(key=lambda e: e.timestamp)
# Apply limit
if query.limit:
entries = entries[:query.limit]
return entries
async def _query_nats(self, query: TemporalQuery) -> list[TemporalEntry]:
"""Query NATS JetStream for historical events matching the topic."""
entries: list[TemporalEntry] = []
try:
js = self._nats_client.jetstream()
subject = f"darkplex.*.{query.topic}.>"
# Get messages from the stream
sub = await js.subscribe(subject, ordered_consumer=True)
count = 0
async for msg in sub.messages:
if count >= query.limit:
break
timestamp = datetime.fromtimestamp(
msg.headers.get("Nats-Time-Stamp", 0) if msg.headers else 0,
tz=timezone.utc,
)
if query.start_time and timestamp < query.start_time:
continue
if query.end_time and timestamp > query.end_time:
continue
entries.append(TemporalEntry(
timestamp=timestamp,
source="nats",
topic=query.topic,
content=msg.data.decode() if msg.data else "",
metadata={"subject": msg.subject},
))
count += 1
except Exception:
logger.exception("NATS temporal query failed for topic: %s", query.topic)
return entries
def _query_chromadb(self, query: TemporalQuery) -> list[TemporalEntry]:
"""Query ChromaDB for semantically relevant entries with time filtering."""
entries: list[TemporalEntry] = []
try:
collection = self._chroma_client.get_or_create_collection("darkplex_knowledge")
where_filter: dict[str, Any] = {}
if query.start_time:
where_filter["timestamp"] = {"$gte": query.start_time.isoformat()}
if query.end_time:
if "timestamp" in where_filter:
where_filter = {
"$and": [
{"timestamp": {"$gte": query.start_time.isoformat()}},
{"timestamp": {"$lte": query.end_time.isoformat()}},
]
}
else:
where_filter["timestamp"] = {"$lte": query.end_time.isoformat()}
results = collection.query(
query_texts=[query.topic],
n_results=query.limit,
where=where_filter if where_filter else None,
)
if results and results.get("documents"):
for i, doc in enumerate(results["documents"][0]):
meta = results["metadatas"][0][i] if results.get("metadatas") else {}
ts_str = meta.get("timestamp", "")
try:
ts = datetime.fromisoformat(ts_str)
except (ValueError, TypeError):
ts = datetime.now(timezone.utc)
entries.append(TemporalEntry(
timestamp=ts,
source="chromadb",
topic=query.topic,
content=doc,
metadata=meta,
relevance_score=results["distances"][0][i] if results.get("distances") else 0.0,
))
except Exception:
logger.exception("ChromaDB temporal query failed for topic: %s", query.topic)
return entries
async def close(self) -> None:
"""Close connections."""
if self._nats_client:
await self._nats_client.close()

345
cortex/knowledge_extractor.py Executable file
View file

@ -0,0 +1,345 @@
#!/usr/bin/env python3
"""
Smart Extractor Extract entities from NATS events and update knowledge graph.
Part of Level 4.4 AGI Roadmap.
Usage:
smart-extractor.py --last 100 Process last N events
smart-extractor.py --since 6h Process events from last 6 hours
smart-extractor.py --dry-run Show what would be extracted without saving
"""
import sys
import os
import json
import subprocess
import re
import time
import logging
from pathlib import Path
from datetime import datetime
# Import entity-manager functions
sys.path.insert(0, str(Path(__file__).parent))
from importlib import import_module
SCRIPT_DIR = Path(__file__).parent
LOG_DIR = Path.home() / "clawd" / "logs"
LOG_FILE = LOG_DIR / "entity-extraction.log"
KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
NATS_STREAM = "openclaw-events"
CONSUMER_NAME = "kg-extractor-temp"
# Setup logging
LOG_DIR.mkdir(parents=True, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler(),
],
)
log = logging.getLogger("smart-extractor")
def load_json(path):
try:
with open(path) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {}
def save_json(path, data):
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
def importance_heuristic(text):
"""Simple importance scoring (0-1) based on content heuristics."""
if not text:
return 0.0
score = 0.3 # base
# Boost for substantive content
if len(text) > 200:
score += 0.1
if len(text) > 500:
score += 0.1
# Boost for entity-rich content
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
if caps > 3:
score += 0.1
if caps > 8:
score += 0.1
# Penalize heartbeat/cron noise
noise_patterns = ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]
for p in noise_patterns:
if p.lower() in text.lower():
score -= 0.3
# Boost for business/project content
boost_words = ["meeting", "project", "company", "contract", "decision",
"strategy", "budget", "deadline", "milestone", "partnership",
"investment", "revenue", "client", "proposal", "agreement"]
for w in boost_words:
if w in text.lower():
score += 0.05
return max(0.0, min(1.0, score))
def fetch_events_nats(last=None, since=None):
"""Fetch events from NATS using consumer approach."""
events = []
# Create a temporary pull consumer
filter_subj = "openclaw.events.main.conversation_message_in"
# Use direct stream get instead of consumer (more reliable)
try:
# Get stream info for sequence range
info_result = subprocess.run(
["nats", "stream", "info", NATS_STREAM, "--json"],
capture_output=True, text=True, timeout=10
)
if info_result.returncode != 0:
log.error("Failed to get stream info")
return events
info = json.loads(info_result.stdout)
end_seq = info["state"]["last_seq"]
start_seq = info["state"]["first_seq"]
# Calculate range
count = last or 500
if since:
# Estimate start sequence from time
ms_since = parse_since(since) * 1000
total_ms = (time.time() * 1000) - (datetime.fromisoformat(info["state"]["first_ts"].replace("Z", "+00:00")).timestamp() * 1000)
total_msgs = end_seq - start_seq
msgs_per_ms = total_msgs / total_ms if total_ms > 0 else 1
fetch_start = max(start_seq, int(end_seq - ms_since * msgs_per_ms * 1.2))
else:
fetch_start = max(start_seq, end_seq - count)
# Only fetch conversation messages
log.info(f"Fetching sequences {fetch_start} - {end_seq}")
step = max(1, (end_seq - fetch_start) // count)
for seq in range(fetch_start, end_seq + 1, step):
try:
result = subprocess.run(
["nats", "stream", "get", NATS_STREAM, str(seq), "--json"],
capture_output=True, text=True, timeout=5
)
if result.returncode != 0:
continue
msg = json.loads(result.stdout)
subj = msg.get("subject", "")
if "conversation_message_in" not in subj:
continue
import base64
# Input validation: max size check (1MB)
raw_data = msg.get("data", "")
if len(raw_data) > 1_048_576:
log.warning("Skipping oversized message at seq %d (%d bytes)", seq, len(raw_data))
continue
try:
decoded = base64.b64decode(raw_data)
except Exception as e:
log.warning("Invalid base64 at seq %d: %s", seq, e)
continue
try:
data = json.loads(decoded.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError) as e:
log.warning("Invalid JSON at seq %d: %s", seq, e)
continue
if not isinstance(data, dict):
log.warning("Expected dict at seq %d, got %s", seq, type(data).__name__)
continue
events.append(data)
except Exception:
continue
log.info(f"Fetched {len(events)} conversation events")
except subprocess.TimeoutExpired:
log.warning("NATS command timed out")
except FileNotFoundError:
log.warning("nats CLI not found — skipping NATS extraction")
# Filter by time if --since specified
if since and events:
cutoff = parse_since(since)
if cutoff:
events = [e for e in events if e.get("timestamp", 0) / 1000 >= cutoff]
return events
def parse_since(since_str):
"""Parse duration string like '6h', '1d', '30m' to epoch timestamp."""
m = re.match(r"(\d+)([hdm])", since_str)
if not m:
return None
val, unit = int(m.group(1)), m.group(2)
seconds = {"h": 3600, "d": 86400, "m": 60}[unit]
return time.time() - (val * seconds)
def extract_from_event(event, known_entities):
"""Extract entities from a single event."""
# Import extract_entities from entity_manager
em = sys.modules.get("entity_manager_mod")
if not em:
# Load entity-manager module
spec_path = Path(__file__).parent / "entity_manager.py"
import importlib.util
spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path)
em = importlib.util.module_from_spec(spec)
sys.modules["entity_manager_mod"] = em
spec.loader.exec_module(em)
payload = event.get("payload", {})
text = payload.get("text_preview", "") or payload.get("text", "")
if isinstance(text, list):
text = " ".join(str(t) for t in text)
if not isinstance(text, str):
text = str(text)
if not text:
return {}, 0.0
score = importance_heuristic(text)
if score < 0.4:
return {}, score
found = em.extract_entities(text, known_entities)
return found, score
def run_extraction(last=None, since=None, dry_run=False):
"""Main extraction pipeline."""
log.info(f"Starting extraction (last={last}, since={since}, dry_run={dry_run})")
# Load known entities
spec_path = Path(__file__).parent / "entity_manager.py"
import importlib.util
spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path)
em = importlib.util.module_from_spec(spec)
sys.modules["entity_manager_mod"] = em
spec.loader.exec_module(em)
known = em.load_known_entities()
log.info(f"Loaded {len(known)} known entities")
# Fetch events
events = fetch_events_nats(last=last, since=since)
log.info(f"Fetched {len(events)} events from NATS")
if not events:
log.info("No events to process")
return
entities = em.load_json(ENTITIES_FILE)
relationships = em.load_json(RELATIONSHIPS_FILE)
total_extracted = 0
new_entities = 0
new_relationships = 0
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
for event in events:
found, score = extract_from_event(event, known)
if not found:
continue
total_extracted += len(found)
names = list(found.keys())
# Add new entities
for name, info in found.items():
if name not in entities:
entities[name] = {
"type": info["type"],
"source": "nats-extraction",
"first_seen": ts_now,
}
new_entities += 1
known[name] = entities[name]
# Create co-occurrence relationships between entities found in same message
if len(names) >= 2:
for i in range(len(names)):
for j in range(i + 1, min(len(names), i + 5)): # limit pairs
a, b = min(names[i], names[j]), max(names[i], names[j])
key = f"{a}::{b}"
if key in relationships:
relationships[key]["count"] = relationships[key].get("count", 1) + 1
relationships[key]["last_seen"] = ts_now
else:
relationships[key] = {
"a": a, "b": b,
"types": ["co-occurrence"],
"count": 1,
"first_seen": ts_now,
"last_seen": ts_now,
}
new_relationships += 1
if not dry_run and total_extracted % 50 == 0 and total_extracted > 0:
# Periodic save
em.save_json(ENTITIES_FILE, entities)
em.save_json(RELATIONSHIPS_FILE, relationships)
if not dry_run:
em.save_json(ENTITIES_FILE, entities)
em.save_json(RELATIONSHIPS_FILE, relationships)
log.info(
f"Done: {len(events)} events processed, {total_extracted} entities extracted, "
f"{new_entities} new entities, {new_relationships} new relationships"
)
print(
f"\nResults: {len(events)} events → {total_extracted} entities extracted, "
f"{new_entities} new, {new_relationships} new relationships"
)
def main():
last = None
since = None
dry_run = False
args = sys.argv[1:]
i = 0
while i < len(args):
if args[i] == "--last" and i + 1 < len(args):
last = int(args[i + 1])
i += 2
elif args[i] == "--since" and i + 1 < len(args):
since = args[i + 1]
i += 2
elif args[i] == "--dry-run":
dry_run = True
i += 1
else:
print(__doc__)
sys.exit(1)
if last is None and since is None:
last = 100 # default
run_extraction(last=last, since=since, dry_run=dry_run)
if __name__ == "__main__":
main()

214
cortex/llm_extractor.py Normal file
View file

@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""
LLM-Powered Entity Extractor Uses Ollama for Named Entity Recognition.
Standalone module. No pip dependencies beyond stdlib.
Calls Ollama HTTP API with structured NER prompts.
Configuration via environment variables:
DARKPLEX_OLLAMA_URL Ollama base URL (default: http://localhost:11434)
DARKPLEX_OLLAMA_MODEL Model name (default: mistral:7b)
DARKPLEX_OLLAMA_TIMEOUT Timeout in seconds (default: 10)
DARKPLEX_EXTRACTOR llm|regex|auto (default: auto)
"""
import json
import logging
import os
import urllib.request
import urllib.error
log = logging.getLogger("llm-extractor")
OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434")
OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "mistral:7b")
OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30"))
VALID_TYPES = {"person", "organization", "company", "project", "technology",
"location", "event", "concept", "product"}
NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object.
Each key is the entity name (lowercase), each value has "type" and "context".
Valid types: person, organization, company, project, technology, location, event, concept, product
Rules:
- Skip common/generic words (the, system, message, etc.)
- Entity names should be lowercase, use hyphens for multi-word
- "context" is a 2-5 word description of the entity's role in the text
- If no entities found, return empty JSON object
- Return ONLY valid JSON, no explanation
Text:
{text}
JSON:"""
BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object.
Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context".
Valid types: person, organization, company, project, technology, location, event, concept, product
Rules:
- Skip common/generic words
- "context" is a 2-5 word description
- If no entities found, return empty JSON object
- Return ONLY valid JSON, no markdown, no explanation
Texts:
{texts}
JSON:"""
def _call_ollama(prompt: str) -> str | None:
"""Call Ollama generate API. Returns response text or None on failure."""
payload = json.dumps({
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.1, "num_predict": 1024},
}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/api/generate",
data=payload,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp:
data = json.loads(resp.read().decode())
return data.get("response", "")
except (urllib.error.URLError, TimeoutError, OSError) as e:
log.warning(f"Ollama call failed: {e}")
return None
except Exception as e:
log.warning(f"Ollama unexpected error: {e}")
return None
def _parse_json_response(text: str) -> dict:
"""Extract JSON dict from LLM response, handling markdown fences etc."""
if not text:
return {}
# Strip markdown code fences
text = text.strip()
if text.startswith("```"):
lines = text.split("\n")
lines = [l for l in lines if not l.strip().startswith("```")]
text = "\n".join(lines)
# Find the JSON object
start = text.find("{")
if start == -1:
return {}
# Find matching closing brace
depth = 0
for i in range(start, len(text)):
if text[i] == "{":
depth += 1
elif text[i] == "}":
depth -= 1
if depth == 0:
try:
return json.loads(text[start:i + 1])
except json.JSONDecodeError:
return {}
return {}
def _normalize_entities(raw: dict) -> dict:
"""Normalize and validate extracted entities."""
result = {}
for name, info in raw.items():
if not isinstance(info, dict):
continue
name = name.strip().lower().replace("_", "-").replace(" ", "-")
if len(name) < 2 or len(name) > 80:
continue
etype = info.get("type", "unknown").lower().strip()
if etype not in VALID_TYPES:
# Map common aliases
aliases = {"org": "organization", "tech": "technology", "loc": "location",
"place": "location", "tool": "technology", "framework": "technology",
"language": "technology", "app": "product", "software": "product",
"service": "product", "group": "organization", "team": "organization"}
etype = aliases.get(etype, "concept")
context = info.get("context", "")
if isinstance(context, str):
context = context[:100]
else:
context = ""
result[name] = {"type": etype, "context": context, "match": "llm"}
return result
def extract_entities_llm(text: str) -> dict[str, dict] | None:
"""
Extract entities from text using Ollama LLM.
Returns dict of {name: {type, context, match}} or None if LLM unavailable.
None signals caller to fall back to regex.
"""
if not text or len(text) < 10:
return {}
# Truncate very long texts
if len(text) > 2000:
text = text[:2000]
prompt = NER_PROMPT.format(text=text)
response = _call_ollama(prompt)
if response is None:
return None # Signal fallback
raw = _parse_json_response(response)
return _normalize_entities(raw)
def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None:
"""
Extract entities from multiple texts in one LLM call.
Returns combined dict or None if LLM unavailable.
"""
if not texts:
return {}
# Filter and truncate
clean = []
for t in texts:
if t and len(t) >= 10:
clean.append(t[:500] if len(t) > 500 else t)
if not clean:
return {}
# Limit batch size to keep prompt reasonable
if len(clean) > 10:
clean = clean[:10]
numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean))
prompt = BATCH_PROMPT.format(texts=numbered)
response = _call_ollama(prompt)
if response is None:
return None
raw = _parse_json_response(response)
return _normalize_entities(raw)
def is_available() -> bool:
"""Check if Ollama is reachable."""
try:
req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET")
with urllib.request.urlopen(req, timeout=3) as resp:
return resp.status == 200
except Exception:
return False

701
cortex/loop.py Normal file
View file

@ -0,0 +1,701 @@
#!/usr/bin/env python3
"""
Darkplex Loop The single heartbeat of the intelligence pipeline.
One process. One loop. One state machine.
Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
Each cycle:
1. INGEST Fetch new events from NATS (batch consumer pull)
2. EXTRACT Pull entities and relationships from events
3. BRIDGE Sync cortex outputs to knowledge engine
4. VERIFY Check that real output was produced
5. REPORT Update state, alert on failure
States:
RUNNING Everything nominal
DEGRADED A step failed, but loop continues with recovery attempts
EMERGENCY Critical failure, alerting
Usage:
darkplex loop # Run loop (default: 1h cycle)
darkplex loop --once # Single cycle, then exit
darkplex loop --cycle 3600 # Custom cycle interval (seconds)
darkplex loop --status # Print current state and exit
darkplex loop --check # Check for new events, exit 0=new 1=none
"""
import json
import logging
import os
import re
import signal
import subprocess
import sys
import time
import traceback
from collections import deque
from datetime import datetime, timezone
from pathlib import Path
# ── Paths (configurable via env) ─────────────────────────────────────────────
BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
SCRIPT_DIR = BASE_DIR / "scripts"
LEVEL4_DIR = SCRIPT_DIR / "level4"
LOG_DIR = BASE_DIR / "logs"
STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
DEFAULT_CYCLE_SECONDS = 3600 # 1 hour
ALERT_COOLDOWN = 3600 # 1 alert per hour max
log = logging.getLogger("darkplex-loop")
# ── State Machine ────────────────────────────────────────────────────────────
class LoopState:
"""Persistent state for the Darkplex Loop."""
def __init__(self):
self.status = "INIT"
self.cycle_count = 0
self.last_cycle = None
self.last_success = None
self.last_failure = None
self.last_alert = None
self.consecutive_failures = 0
self.entities_total = 0
self.relationships_total = 0
self.entities_extracted_last = 0
self.entities_new_last = 0
self.events_processed_last = 0
self.steps = {}
self.error = None
self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}]
self._load()
def _load(self):
try:
data = json.loads(STATE_FILE.read_text())
for k, v in data.items():
if hasattr(self, k):
setattr(self, k, v)
except (FileNotFoundError, json.JSONDecodeError):
pass
def save(self):
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
def record_perf(self, perf: dict):
"""Record performance metrics for this cycle."""
self.perf = perf
self.perf_history.append(perf)
self.perf_history = self.perf_history[-10:] # keep last 10
def perf_averages(self) -> dict:
"""Running averages over last 10 cycles."""
if not self.perf_history:
return {}
keys = self.perf_history[0].keys()
return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
def record_success(self, step_results: dict):
self.status = "RUNNING"
self.consecutive_failures = 0
self.last_success = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_success
self.cycle_count += 1
self.steps = step_results
self.error = None
self.save()
def record_failure(self, step: str, error: str):
self.consecutive_failures += 1
self.last_failure = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_failure
self.cycle_count += 1
self.error = f"{step}: {error}"
if self.consecutive_failures >= 3:
self.status = "EMERGENCY"
else:
self.status = "DEGRADED"
self.save()
def can_alert(self) -> bool:
if not self.last_alert:
return True
try:
last = datetime.fromisoformat(self.last_alert)
return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
except (ValueError, TypeError):
return True
def mark_alerted(self):
self.last_alert = datetime.now(timezone.utc).isoformat()
self.save()
# ── Pipeline Steps ───────────────────────────────────────────────────────────
def _nats_cmd():
"""Build NATS CLI base command with auth."""
nats_bin = os.environ.get("NATS_BIN", "nats")
nats_url = os.environ.get("NATS_URL", "")
if nats_url:
return [nats_bin, "-s", nats_url]
return [nats_bin]
def check_new_events() -> int:
"""Return number of pending events in the consumer. 0 = nothing new."""
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return -1
info = json.loads(r.stdout)
return info.get("num_pending", 0)
except Exception as e:
log.warning(f"check_new_events failed: {e}")
return -1
def step_ingest(state: LoopState) -> dict:
"""Step 1: Fetch new events from NATS using batch consumer pull."""
log.info("STEP 1: INGEST — Fetching events from NATS")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
# Check how many pending
pending = check_new_events()
if pending == 0:
log.info("INGEST: No new events — skipping cycle")
return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
log.info(f"INGEST: {pending} pending events in consumer")
events = []
total_fetched = 0
parse_errors = 0
# Fetch in batches
remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
try:
batch_size = min(remaining, NATS_BATCH_SIZE)
result = subprocess.run(
_nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
"--count", str(batch_size), "--raw"],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
return _step_ingest_sequential(state)
for line in result.stdout.strip().split("\n"):
if not line.strip():
continue
try:
data = json.loads(line)
events.append(data)
total_fetched += 1
except json.JSONDecodeError:
parse_errors += 1
except subprocess.TimeoutExpired:
log.warning("Batch fetch timed out, falling back to sequential")
return _step_ingest_sequential(state)
# Update sequence tracking (get current stream seq from consumer info)
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode == 0:
info = json.loads(r.stdout)
stream_seq = info["delivered"]["stream_seq"]
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
except Exception:
log.warning("Could not save last processed sequence")
log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
def _step_ingest_sequential(state: LoopState) -> dict:
"""Fallback: sequential fetch via stream get (slow but reliable)."""
import base64
log.info("INGEST FALLBACK: Sequential fetch")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
last_processed_seq = 0
try:
if last_processed_seq_file.exists():
last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
except Exception:
pass
r = subprocess.run(
_nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return {"events": [], "total_scanned": 0, "skipped": 0}
info = json.loads(r.stdout)
end_seq = info["state"]["last_seq"]
start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
events = []
skipped = 0
for seq in range(start_seq, end_seq + 1):
try:
result = subprocess.run(
_nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
capture_output=True, text=True, timeout=5,
)
if result.returncode != 0:
skipped += 1
continue
msg = json.loads(result.stdout)
if "conversation_message_in" not in msg.get("subject", ""):
skipped += 1
continue
data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
events.append(data)
except Exception:
skipped += 1
try:
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
except Exception:
pass
log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
def step_extract(state: LoopState, events: list) -> dict:
"""Step 2: Extract entities and relationships from events."""
log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
if not events:
log.info("EXTRACT: No events to process")
return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
sys.path.insert(0, str(LEVEL4_DIR))
import importlib.util
spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
em = importlib.util.module_from_spec(spec)
spec.loader.exec_module(em)
# Try LLM batch extraction first
from llm_extractor import extract_entities_llm_batch, is_available as llm_available
use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
llm_ok = use_llm and llm_available()
if llm_ok:
log.info("EXTRACT: Using LLM extractor (Ollama)")
else:
log.info("EXTRACT: Using regex extractor (fallback)")
known = em.load_known_entities()
entities = em.load_json(ENTITIES_FILE)
relationships = em.load_json(RELATIONSHIPS_FILE)
total_extracted = 0
new_entities = 0
new_relationships = 0
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
# Prepare texts for potential batch LLM processing
event_texts = []
for event in events:
payload = event.get("payload", {})
text = payload.get("text_preview", "") or payload.get("text", "")
if isinstance(text, list):
parts = []
for t in text:
parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
text = " ".join(parts)
if not isinstance(text, str):
text = str(text)
score = _importance(text) if text else 0.0
event_texts.append((text, score))
# LLM batch extraction for qualifying texts
llm_results = {}
if llm_ok:
batch_texts = [t for t, s in event_texts if t and s >= 0.4]
if batch_texts:
consecutive_fails = 0
for i in range(0, len(batch_texts), 10):
if consecutive_fails >= 3:
log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
llm_ok = False
break
chunk = batch_texts[i:i+10]
batch_result = extract_entities_llm_batch(chunk)
if batch_result:
llm_results.update(batch_result)
consecutive_fails = 0
else:
consecutive_fails += 1
if llm_results:
log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
for idx, event in enumerate(events):
text, score = event_texts[idx]
if not text or score < 0.4:
continue
if llm_ok and llm_results:
# Use LLM results + known entity matching
found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
# Add LLM entities that appear in this text
text_lower = text.lower()
for name, info in llm_results.items():
variants = [name, name.replace("-", " "), name.replace("-", "")]
if any(v in text_lower for v in variants if len(v) > 2):
found[name] = info
else:
found = em.extract_entities(text, known)
if not found:
continue
total_extracted += len(found)
names = list(found.keys())
for name, info in found.items():
if name not in entities:
entities[name] = {
"type": info["type"],
"source": "darkplex-loop",
"first_seen": ts_now,
}
new_entities += 1
known[name] = entities[name]
if len(names) >= 2:
for i in range(len(names)):
for j in range(i + 1, min(len(names), i + 5)):
a, b = min(names[i], names[j]), max(names[i], names[j])
key = f"{a}::{b}"
if key in relationships:
relationships[key]["count"] = relationships[key].get("count", 1) + 1
relationships[key]["last_seen"] = ts_now
else:
relationships[key] = {
"a": a, "b": b, "types": ["co-occurrence"],
"count": 1, "first_seen": ts_now, "last_seen": ts_now,
}
new_relationships += 1
em.save_json(ENTITIES_FILE, entities)
em.save_json(RELATIONSHIPS_FILE, relationships)
state.entities_total = len(entities)
state.relationships_total = len(relationships)
state.entities_extracted_last = total_extracted
state.entities_new_last = new_entities
state.events_processed_last = len(events)
log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
def step_bridge(state: LoopState) -> dict:
"""Step 3: Run knowledge bridge."""
log.info("STEP 3: BRIDGE — Syncing cortex outputs")
bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
if not bridge_script.exists():
log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
return {"status": "skipped", "reason": "script not found"}
result = subprocess.run(
[sys.executable, str(bridge_script), "sync"],
capture_output=True, text=True, timeout=120,
)
if result.returncode != 0:
log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
return {"status": "failed", "error": result.stderr[:200]}
bridged = 0
for line in result.stdout.split("\n"):
m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
if m:
bridged += int(m.group(1))
log.info(f"BRIDGE: {bridged} items bridged")
return {"status": "ok", "bridged": bridged}
def step_verify(state: LoopState, extract_result: dict) -> dict:
"""Step 4: Verify output quality."""
log.info("STEP 4: VERIFY — Checking output quality")
issues = []
for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
if not f.exists():
issues.append(f"{label} file missing")
else:
try:
data = json.loads(f.read_text())
if not data:
issues.append(f"{label} file is empty")
except json.JSONDecodeError:
issues.append(f"{label} file is corrupt JSON")
events_processed = state.events_processed_last
extracted = extract_result.get("extracted", 0)
if events_processed > 10 and extracted == 0:
issues.append(f"0 entities from {events_processed} events — extraction may be broken")
try:
r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
if r.returncode != 0:
issues.append("NATS unreachable")
except Exception as e:
issues.append(f"NATS check failed: {e}")
verdict = "PASS" if not issues else "FAIL"
log.info(f"VERIFY: {verdict}{len(issues)} issues")
for issue in issues:
log.warning(f"{issue}")
return {"verdict": verdict, "issues": issues}
def step_report(state: LoopState, verify_result: dict):
"""Step 5: Alert if degraded/emergency."""
if state.status == "RUNNING":
return
if not state.can_alert():
log.info("REPORT: Alert cooldown active, skipping")
return
severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
msg = (
f"Darkplex Loop {severity}\n"
f"Consecutive failures: {state.consecutive_failures}\n"
f"Error: {state.error}\n"
f"Issues: {', '.join(verify_result.get('issues', []))}"
)
log.warning(f"REPORT: Sending alert — {state.status}")
try:
subprocess.run(
["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
capture_output=True, text=True, timeout=15,
)
except Exception:
pass
flag = LOG_DIR / "darkplex-loop-alert.flag"
flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
state.mark_alerted()
# ── Helpers ──────────────────────────────────────────────────────────────────
def _importance(text: str) -> float:
"""Importance scoring for event text."""
if not text:
return 0.0
score = 0.3
if len(text) > 200: score += 0.1
if len(text) > 500: score += 0.1
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
if caps > 3: score += 0.1
if caps > 8: score += 0.1
for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
if p.lower() in text.lower():
score -= 0.3
for w in ["meeting", "project", "company", "contract", "decision", "strategy",
"budget", "deadline", "milestone", "partnership", "investment", "revenue",
"client", "proposal", "agreement"]:
if w in text.lower():
score += 0.05
return max(0.0, min(1.0, score))
def print_status():
"""Print current loop state."""
state = LoopState()
ent_count = rel_count = 0
try:
ent_count = len(json.loads(ENTITIES_FILE.read_text()))
except Exception:
pass
try:
rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
except Exception:
pass
icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "")
print(f"{icon} Status: {state.status}")
print(f"Cycles: {state.cycle_count}")
print(f"Last cycle: {state.last_cycle or 'never'}")
print(f"Last success: {state.last_success or 'never'}")
print(f"Last failure: {state.last_failure or 'never'}")
print(f"Failures: {state.consecutive_failures}")
print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
print(f"Relationships:{rel_count} total")
if state.error:
print(f"Error: {state.error}")
# ── Main Loop ────────────────────────────────────────────────────────────────
def _ms_since(t0: float) -> int:
return int((time.monotonic() - t0) * 1000)
def run_cycle(state: LoopState) -> bool:
"""Run one complete pipeline cycle. Returns True on success."""
log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
step_results = {}
perf = {}
t_cycle = time.monotonic()
try:
t0 = time.monotonic()
ingest = step_ingest(state)
perf["ingest_ms"] = _ms_since(t0)
step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
# Early skip if no new events
if ingest.get("skip_reason") == "no_new_events":
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
state.save()
log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
return True
t0 = time.monotonic()
extract = step_extract(state, ingest["events"])
perf["extract_ms"] = _ms_since(t0)
step_results["extract"] = extract
t0 = time.monotonic()
bridge = step_bridge(state)
perf["bridge_ms"] = _ms_since(t0)
step_results["bridge"] = bridge
t0 = time.monotonic()
verify = step_verify(state, extract)
perf["verify_ms"] = _ms_since(t0)
step_results["verify"] = verify
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
state.record_failure("verify", "; ".join(verify["issues"]))
step_report(state, verify)
return False
state.record_success(step_results)
avgs = state.perf_averages()
log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status}{perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
flag = LOG_DIR / "darkplex-loop-alert.flag"
if flag.exists():
flag.unlink()
return True
except Exception as e:
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
step_name = "unknown"
for name in ["ingest", "extract", "bridge", "verify"]:
if name not in step_results:
step_name = name
break
log.error(f"CYCLE FAILED at {step_name}: {e}")
log.error(traceback.format_exc())
state.record_failure(step_name, str(e)[:300])
step_report(state, {"issues": [str(e)]})
return False
def main():
"""CLI entry point for `darkplex loop`."""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
logging.StreamHandler(),
],
)
LOG_DIR.mkdir(parents=True, exist_ok=True)
args = sys.argv[1:]
if "--status" in args:
print_status()
return
if "--check" in args:
pending = check_new_events()
if pending > 0:
print(f"NEW: {pending} events pending")
sys.exit(0)
elif pending == 0:
print("NONE: No new events")
sys.exit(1)
else:
print("ERROR: Could not check")
sys.exit(2)
once = "--once" in args
cycle_seconds = DEFAULT_CYCLE_SECONDS
for i, arg in enumerate(args):
if arg == "--cycle" and i + 1 < len(args):
cycle_seconds = int(args[i + 1])
state = LoopState()
log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
running = True
def handle_signal(sig, frame):
nonlocal running
log.info("Shutdown signal received")
running = False
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while running:
run_cycle(state)
if once:
break
log.info(f"Sleeping {cycle_seconds}s until next cycle...")
for _ in range(cycle_seconds):
if not running:
break
time.sleep(1)
log.info("Darkplex Loop stopped")

View file

@ -3,9 +3,9 @@ requires = ["setuptools>=68.0", "wheel"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project] [project]
name = "cortex" name = "darkplex-core"
version = "0.1.0" version = "0.2.0"
description = "Intelligence layer for OpenClaw — triage, health, feedback, memory hygiene, roadmap, validation" description = "Darkplex Intelligence Layer — triage, health, feedback, governance, knowledge extraction, memory hygiene, roadmap, validation"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
license = {text = "MIT"} license = {text = "MIT"}
@ -15,6 +15,7 @@ authors = [
[project.scripts] [project.scripts]
cortex = "cortex.cli:main" cortex = "cortex.cli:main"
darkplex = "cortex.cli:main"
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
include = ["cortex*"] include = ["cortex*"]

106
tests/test_anticipator.py Normal file
View file

@ -0,0 +1,106 @@
"""Tests for intelligence/anticipator module."""
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from cortex.intelligence.anticipator import (
AlertSeverity,
Anticipator,
PatternDefinition,
Prediction,
_detect_recurring_errors,
_detect_ssl_expiry,
_detect_usage_spike,
)
class TestAnticipatorInit:
def test_creates_with_builtin_patterns(self):
a = Anticipator()
assert len(a.patterns) == 3
def test_register_custom_pattern(self):
a = Anticipator()
p = PatternDefinition(name="test", description="test", detector=lambda e: None)
a.register_pattern(p)
assert len(a.patterns) == 4
class TestAnalyze:
def test_empty_events(self):
a = Anticipator()
result = a.analyze([])
assert result == []
def test_no_matching_patterns(self):
a = Anticipator()
result = a.analyze([{"type": "unrelated", "data": {}}])
assert result == []
def test_detector_exception_handled(self):
def bad_detector(events):
raise RuntimeError("boom")
a = Anticipator()
a.patterns = [PatternDefinition(name="bad", description="", detector=bad_detector)]
result = a.analyze([{}])
assert result == []
class TestSSLExpiry:
def test_no_ssl_events(self):
assert _detect_ssl_expiry([{"type": "other"}]) is None
def test_expiring_soon(self):
expiry = (datetime.now(timezone.utc) + timedelta(days=5)).isoformat()
events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
result = _detect_ssl_expiry(events)
assert result is not None
assert result.severity == AlertSeverity.WARNING
def test_expiring_critical(self):
expiry = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()
events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
result = _detect_ssl_expiry(events)
assert result.severity == AlertSeverity.CRITICAL
def test_not_expiring(self):
expiry = (datetime.now(timezone.utc) + timedelta(days=60)).isoformat()
events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
assert _detect_ssl_expiry(events) is None
class TestRecurringErrors:
def test_no_errors(self):
assert _detect_recurring_errors([]) is None
def test_few_errors(self):
events = [{"type": "error", "data": {"error_type": "timeout"}}] * 2
assert _detect_recurring_errors(events) is None
def test_recurring_detected(self):
events = [{"type": "error", "data": {"error_type": "timeout"}}] * 5
result = _detect_recurring_errors(events)
assert result is not None
assert result.metadata["count"] == 5
class TestUsageSpike:
def test_insufficient_data(self):
assert _detect_usage_spike([]) is None
def test_normal_usage(self):
events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(15)]
assert _detect_usage_spike(events) is None
def test_spike_detected(self):
events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(12)]
events[-1]["data"]["value"] = 100
events[-2]["data"]["value"] = 100
events[-3]["data"]["value"] = 100
result = _detect_usage_spike(events)
assert result is not None

112
tests/test_collective.py Normal file
View file

@ -0,0 +1,112 @@
"""Tests for intelligence/collective module."""
import asyncio
import sys
from pathlib import Path
from unittest import mock
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from cortex.intelligence.shared_memory import Insight, SharedMemory, ALLOWED_AGENTS
from cortex.intelligence.collective import AggregatedPattern, CollectiveLearning
class TestCollectiveLearningInit:
def test_init(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
assert cl._patterns == []
assert len(cl._insights_by_topic) == 0
class TestPatternDetection:
def test_no_patterns_with_single_agent(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
# Add insights from same agent
agent = list(ALLOWED_AGENTS)[0]
for i in range(5):
cl._insights_by_topic["infra"].append(
Insight(agent=agent, topic="infra", content=f"test {i}")
)
cl._detect_patterns()
assert len(cl._patterns) == 0
def test_pattern_with_multiple_agents(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
agents = list(ALLOWED_AGENTS)[:2]
cl._insights_by_topic["infra"].append(
Insight(agent=agents[0], topic="infra", content="observation 1")
)
cl._insights_by_topic["infra"].append(
Insight(agent=agents[1], topic="infra", content="observation 2")
)
cl._detect_patterns()
assert len(cl._patterns) == 1
assert cl._patterns[0].topic == "infra"
class TestGetPatterns:
def test_filter_by_topic(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
agents = list(ALLOWED_AGENTS)[:2]
for topic in ["infra", "security"]:
for agent in agents:
cl._insights_by_topic[topic].append(
Insight(agent=agent, topic=topic, content="test")
)
cl._detect_patterns()
assert len(cl.get_patterns(topic="infra")) == 1
def test_filter_by_confidence(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
agents = list(ALLOWED_AGENTS)[:2]
cl._insights_by_topic["low"].append(
Insight(agent=agents[0], topic="low", content="x", confidence=0.1)
)
cl._insights_by_topic["low"].append(
Insight(agent=agents[1], topic="low", content="y", confidence=0.1)
)
cl._detect_patterns()
assert len(cl.get_patterns(min_confidence=0.5)) == 0
class TestTopicSummary:
def test_empty(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
assert cl.get_topic_summary() == {}
class TestExportKnowledge:
def test_export_json(self):
import json
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
data = json.loads(cl.export_knowledge())
assert "patterns" in data
assert "topics" in data
assert "allowed_agents" in data
class TestHandleInsight:
@pytest.mark.asyncio
async def test_rejects_non_allowed_agent(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
insight = Insight(agent="unauthorized_agent", topic="test", content="bad")
await cl._handle_insight(insight)
assert len(cl._insights_by_topic) == 0
@pytest.mark.asyncio
async def test_accepts_allowed_agent(self):
sm = mock.AsyncMock(spec=SharedMemory)
cl = CollectiveLearning(sm)
agent = list(ALLOWED_AGENTS)[0]
insight = Insight(agent=agent, topic="test", content="good")
await cl._handle_insight(insight)
assert len(cl._insights_by_topic["test"]) == 1

View file

@ -0,0 +1,111 @@
"""Tests for entity_manager module."""
import json
import sys
import tempfile
from pathlib import Path
from unittest import mock
import pytest
# Add parent to path
sys.path.insert(0, str(Path(__file__).parent.parent))
import cortex.entity_manager as em
class TestNormalize:
def test_basic(self):
assert em.normalize("Hello World") == "hello world"
def test_underscores(self):
assert em.normalize("my_entity") == "my-entity"
def test_whitespace(self):
assert em.normalize(" test ") == "test"
class TestLoadJson:
def test_missing_file(self):
assert em.load_json(Path("/nonexistent/file.json")) == {}
def test_valid_json(self, tmp_path):
f = tmp_path / "test.json"
f.write_text('{"key": "value"}')
assert em.load_json(f) == {"key": "value"}
def test_invalid_json(self, tmp_path):
f = tmp_path / "bad.json"
f.write_text("not json")
assert em.load_json(f) == {}
class TestSaveJson:
def test_creates_dirs(self, tmp_path):
f = tmp_path / "sub" / "dir" / "test.json"
em.save_json(f, {"hello": "world"})
assert json.loads(f.read_text()) == {"hello": "world"}
class TestExtractEntities:
def test_known_entity(self):
known = {"acme-corp": {"type": "company"}}
result = em.extract_entities("Working with Acme Corp today", known)
assert "acme-corp" in result
def test_mention(self):
result = em.extract_entities("Talked to @johndoe about it", {})
assert "johndoe" in result
assert result["johndoe"]["type"] == "person"
def test_capitalized_multi_word(self):
result = em.extract_entities("Met with John Smith yesterday", {})
assert "john smith" in result
def test_acronym(self):
result = em.extract_entities("The ACME project is going well", {})
assert "acme" in result
assert result["acme"]["type"] == "organization"
def test_stop_words_filtered(self):
result = em.extract_entities("The system is working fine", {})
# None of these should be extracted as entities
for word in ["the", "system", "working"]:
assert word not in result
def test_empty_text(self):
result = em.extract_entities("", {})
assert result == {}
def test_short_mention_filtered(self):
"""Mentions shorter than 3 chars should be filtered."""
result = em.extract_entities("@ab said hi", {})
assert "ab" not in result
class TestCmdBootstrap:
def test_bootstrap_with_empty_areas(self, tmp_path):
with mock.patch.object(em, "LIFE_AREAS", tmp_path):
with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
em.cmd_bootstrap()
assert (tmp_path / "entities.json").exists()
class TestCmdRelate:
def test_create_relationship(self, tmp_path):
with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
em.cmd_relate("Alice", "Bob", "colleague")
rels = json.loads((tmp_path / "rels.json").read_text())
assert len(rels) == 1
key = list(rels.keys())[0]
assert "colleague" in rels[key]["types"]
def test_update_relationship(self, tmp_path):
with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
em.cmd_relate("Alice", "Bob", "colleague")
em.cmd_relate("Alice", "Bob", "friend")
rels = json.loads((tmp_path / "rels.json").read_text())
key = list(rels.keys())[0]
assert rels[key]["count"] == 2

View file

@ -0,0 +1,79 @@
"""Tests for governance/enforcer.py — Runtime Enforcer."""
import sys
from pathlib import Path
import yaml
import pytest
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from governance.enforcer import Enforcer, Decision
from governance.policy import PolicyEngine
from governance.risk_scorer import RiskScorer
from governance.evidence import EvidenceCollector, ControlMapping
def _make_enforcer(tmp_path, rules=None):
if rules:
policy_file = tmp_path / "test.yaml"
policy_file.write_text(yaml.dump({
"name": "test", "description": "", "version": "1",
"rules": rules,
}))
engine = PolicyEngine(policies_dir=str(tmp_path))
scorer = RiskScorer()
collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
return Enforcer(policy_engine=engine, risk_scorer=scorer, evidence_collector=collector)
class TestDecision:
def test_approved(self):
from governance.risk_scorer import RiskResult
d = Decision(verdict="approve", reason="ok", risk=RiskResult(0, "low", {}), policy_result={})
assert d.approved
def test_not_approved(self):
from governance.risk_scorer import RiskResult
d = Decision(verdict="deny", reason="no", risk=RiskResult(9, "critical", {}), policy_result={})
assert not d.approved
class TestEnforcer:
def test_default_allow(self, tmp_path):
enforcer = _make_enforcer(tmp_path)
decision = enforcer.evaluate({"agent": "claudia", "action": "read", "hour": 12})
assert decision.verdict == "allow"
def test_policy_deny(self, tmp_path):
enforcer = _make_enforcer(tmp_path, rules=[
{"name": "deny-ext", "conditions": {"target": "external"}, "effect": "deny", "priority": 10},
])
decision = enforcer.evaluate({"agent": "claudia", "action": "send", "target": "external", "hour": 12})
assert decision.verdict == "deny"
def test_risk_override(self, tmp_path):
"""High risk should override an allow policy to escalate."""
enforcer = _make_enforcer(tmp_path, rules=[
{"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1},
])
decision = enforcer.evaluate({
"agent": "claudia", "action": "export",
"data_type": "restricted", "target": "external", "hour": 12,
})
# Risk should be high/critical, overriding the allow
assert decision.verdict in ("deny", "escalate")
def test_evidence_recorded(self, tmp_path):
enforcer = _make_enforcer(tmp_path)
enforcer.evaluate({"agent": "test", "action": "read", "hour": 12})
assert len(enforcer.evidence_collector.evidence) == 1
def test_data_classification_alias(self, tmp_path):
enforcer = _make_enforcer(tmp_path)
decision = enforcer.evaluate({
"agent": "test", "action": "read",
"data_classification": "confidential", "hour": 12,
})
# Should use data_classification as data_type
assert decision.risk.factors["data_type"]["value"] == "confidential"

View file

@ -0,0 +1,86 @@
"""Tests for governance/evidence.py — Evidence Collector & Control Mapping."""
import json
import sys
from pathlib import Path
import yaml
import pytest
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from governance.evidence import EvidenceCollector, EvidenceRecord, ControlMapping
class TestControlMapping:
def test_missing_file(self):
m = ControlMapping(mapping_path="/nonexistent/path.yaml")
assert m.get_controls("anything") == []
def test_load_mapping(self, tmp_path):
mapping_file = tmp_path / "mapping.yaml"
mapping_file.write_text(yaml.dump({
"mappings": [
{"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]},
{"event_types": ["access_request", "data_export"], "controls": ["A.9.1"]},
]
}))
m = ControlMapping(mapping_path=str(mapping_file))
assert "A.5.1" in m.get_controls("policy_evaluation")
assert "A.9.1" in m.get_controls("access_request")
assert m.get_controls("unknown_event") == []
class TestEvidenceRecord:
def test_to_dict(self):
r = EvidenceRecord(
timestamp="2026-01-01T00:00:00Z",
event_type="test",
agent="claudia",
action="read",
verdict="allow",
risk_score=2,
risk_level="low",
controls=["A.5.1"],
)
d = r.to_dict()
assert d["agent"] == "claudia"
assert d["controls"] == ["A.5.1"]
class TestEvidenceCollector:
def setup_method(self):
self.collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
def test_record(self):
rec = self.collector.record(
event_type="policy_evaluation",
agent="claudia",
action="send_email",
verdict="allow",
risk_score=3,
risk_level="low",
)
assert rec.agent == "claudia"
assert len(self.collector.evidence) == 1
def test_filter_by_agent(self):
self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
self.collector.record(event_type="e", agent="b", action="x", verdict="deny")
assert len(self.collector.get_evidence(agent="a")) == 1
def test_filter_by_verdict(self):
self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
self.collector.record(event_type="e", agent="a", action="y", verdict="deny")
assert len(self.collector.get_evidence(verdict="deny")) == 1
def test_export_json(self):
self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
exported = self.collector.export_json()
data = json.loads(exported)
assert len(data) == 1
assert data[0]["agent"] == "a"
def test_empty_evidence(self):
assert self.collector.get_evidence() == []
assert json.loads(self.collector.export_json()) == []

View file

@ -0,0 +1,126 @@
"""Tests for governance/policy.py — Policy Engine.
NOTE: This module exists only in darkplex-core. Tests written against the module API.
"""
import os
import tempfile
import pytest
from pathlib import Path
# We need yaml for creating test fixtures
import yaml
def _write_policy(tmpdir, filename, data):
path = Path(tmpdir) / filename
path.write_text(yaml.dump(data))
return path
class TestRule:
def setup_method(self):
import sys
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from governance.policy import Rule
self.Rule = Rule
def test_matches_simple(self):
r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
assert r.matches({"agent": "claudia"})
def test_no_match(self):
r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
assert not r.matches({"agent": "other"})
def test_missing_key(self):
r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
assert not r.matches({})
def test_list_condition(self):
r = self.Rule(name="r1", conditions={"action": ["read", "write"]}, effect="allow")
assert r.matches({"action": "read"})
assert not r.matches({"action": "delete"})
def test_multiple_conditions(self):
r = self.Rule(name="r1", conditions={"agent": "claudia", "action": "send"}, effect="deny")
assert r.matches({"agent": "claudia", "action": "send"})
assert not r.matches({"agent": "claudia", "action": "read"})
class TestPolicyEngine:
def setup_method(self):
import sys
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from governance.policy import PolicyEngine
self.PolicyEngine = PolicyEngine
def test_empty_dir(self, tmp_path):
engine = self.PolicyEngine(policies_dir=str(tmp_path))
assert engine.policies == []
def test_nonexistent_dir(self, tmp_path):
engine = self.PolicyEngine(policies_dir=str(tmp_path / "nope"))
assert engine.policies == []
def test_load_policy(self, tmp_path):
_write_policy(tmp_path, "test.yaml", {
"name": "test-policy",
"description": "Test",
"version": "1.0.0",
"rules": [
{"name": "deny-external", "conditions": {"target": "external"}, "effect": "deny", "priority": 10},
],
})
engine = self.PolicyEngine(policies_dir=str(tmp_path))
assert len(engine.policies) == 1
assert engine.policies[0].name == "test-policy"
assert len(engine.policies[0].rules) == 1
def test_evaluate_no_match(self, tmp_path):
_write_policy(tmp_path, "test.yaml", {
"name": "p", "description": "", "version": "1",
"rules": [{"name": "r1", "conditions": {"agent": "x"}, "effect": "deny"}],
})
engine = self.PolicyEngine(policies_dir=str(tmp_path))
result = engine.evaluate({"agent": "y"})
assert result["verdict"] == "allow"
def test_evaluate_match_deny(self, tmp_path):
_write_policy(tmp_path, "test.yaml", {
"name": "p", "description": "", "version": "1",
"rules": [{"name": "r1", "conditions": {"target": "external"}, "effect": "deny", "priority": 5}],
})
engine = self.PolicyEngine(policies_dir=str(tmp_path))
result = engine.evaluate({"target": "external"})
assert result["verdict"] == "deny"
def test_priority_ordering(self, tmp_path):
_write_policy(tmp_path, "test.yaml", {
"name": "p", "description": "", "version": "1",
"rules": [
{"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1},
{"name": "deny-ext", "conditions": {"agent": "claudia"}, "effect": "deny", "priority": 10},
],
})
engine = self.PolicyEngine(policies_dir=str(tmp_path))
result = engine.evaluate({"agent": "claudia"})
assert result["verdict"] == "deny" # higher priority wins
def test_reload(self, tmp_path):
engine = self.PolicyEngine(policies_dir=str(tmp_path))
assert len(engine.policies) == 0
_write_policy(tmp_path, "new.yaml", {
"name": "new", "description": "", "version": "1", "rules": [],
})
engine.reload()
assert len(engine.policies) == 1
def test_skips_schema_yaml(self, tmp_path):
_write_policy(tmp_path, "schema.yaml", {"name": "schema"})
_write_policy(tmp_path, "real.yaml", {
"name": "real", "description": "", "version": "1", "rules": [],
})
engine = self.PolicyEngine(policies_dir=str(tmp_path))
assert len(engine.policies) == 1
assert engine.policies[0].name == "real"

View file

@ -0,0 +1,57 @@
"""Tests for governance/report_generator.py."""
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from governance.evidence import EvidenceCollector, ControlMapping
from governance.report_generator import ReportGenerator
class TestReportGenerator:
def _collector_with_mapping(self, tmp_path):
import yaml
mapping_file = tmp_path / "mapping.yaml"
mapping_file.write_text(yaml.dump({
"mappings": [
{"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]},
]
}))
return EvidenceCollector(control_mapping=ControlMapping(str(mapping_file)))
def test_empty_report(self):
collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
gen = ReportGenerator(collector)
report = gen.generate_compliance_report()
assert report["status"] == "no_evidence"
def test_report_with_evidence(self, tmp_path):
collector = self._collector_with_mapping(tmp_path)
collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow", risk_score=2, risk_level="low")
collector.record(event_type="policy_evaluation", agent="claudia", action="write", verdict="deny", risk_score=8, risk_level="high")
gen = ReportGenerator(collector)
report = gen.generate_compliance_report()
assert report["total_evidence"] == 2
assert "A.5.1" in report["controls_covered"]
assert report["summary"]["total_deny"] == 1
assert report["summary"]["high_risk_events"] == 1
def test_agent_report(self, tmp_path):
collector = self._collector_with_mapping(tmp_path)
collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow")
collector.record(event_type="policy_evaluation", agent="other", action="read", verdict="deny")
gen = ReportGenerator(collector)
report = gen.generate_agent_report("claudia")
assert report["agent"] == "claudia"
assert report["total_actions"] == 1
def test_export_json(self):
collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
gen = ReportGenerator(collector)
output = gen.export_json()
data = json.loads(output)
assert "status" in data # empty report

View file

@ -0,0 +1,80 @@
"""Tests for governance/risk_scorer.py."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from governance.risk_scorer import RiskScorer, RiskResult, _classify_level
class TestClassifyLevel:
def test_low(self):
assert _classify_level(0) == "low"
assert _classify_level(3) == "low"
def test_elevated(self):
assert _classify_level(4) == "elevated"
assert _classify_level(6) == "elevated"
def test_high(self):
assert _classify_level(7) == "high"
assert _classify_level(8) == "high"
def test_critical(self):
assert _classify_level(9) == "critical"
assert _classify_level(10) == "critical"
class TestRiskResult:
def test_acceptable(self):
r = RiskResult(value=3, level="low", factors={})
assert r.is_acceptable
def test_not_acceptable(self):
r = RiskResult(value=7, level="high", factors={})
assert not r.is_acceptable
class TestRiskScorer:
def setup_method(self):
self.scorer = RiskScorer()
def test_default_low_risk(self):
result = self.scorer.score({"hour": 12})
assert result.level == "low"
assert result.is_acceptable
def test_public_internal(self):
result = self.scorer.score({"data_type": "public", "target": "internal", "hour": 12})
assert result.value <= 3
def test_confidential_external(self):
result = self.scorer.score({"data_type": "confidential", "target": "external", "hour": 12})
assert result.value >= 7
def test_restricted_critical(self):
result = self.scorer.score({"data_type": "restricted", "target": "external", "hour": 12})
assert result.level in ("high", "critical")
def test_off_hours_bonus(self):
day = self.scorer.score({"data_type": "internal", "hour": 12})
night = self.scorer.score({"data_type": "internal", "hour": 2})
assert night.value > day.value
def test_admin_role_reduces_risk(self):
admin = self.scorer.score({"agent_role": "admin", "hour": 12})
external = self.scorer.score({"agent_role": "external", "hour": 12})
assert admin.value < external.value
def test_factors_populated(self):
result = self.scorer.score({"data_type": "internal", "target": "external", "hour": 10})
assert "data_type" in result.factors
assert "target" in result.factors
assert "agent_role" in result.factors
assert "time_of_day" in result.factors
def test_clamped_0_10(self):
# Even with extreme values, should be 0-10
result = self.scorer.score({"data_type": "restricted", "target": "external", "agent_role": "external", "hour": 3})
assert 0 <= result.value <= 10

View file

@ -0,0 +1,136 @@
"""Tests for intelligence/knowledge_cleanup.py — Knowledge Graph Cleanup."""
import json
import math
import sys
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
import knowledge_cleanup as kc
class TestBackup:
def test_creates_backup(self, tmp_path):
src = tmp_path / "test.json"
src.write_text('{"a": 1}')
backup_path = kc.backup(src)
assert backup_path.exists()
assert "backup_" in backup_path.name
class TestAtomicWrite:
def test_writes_atomically(self, tmp_path):
path = tmp_path / "out.json"
kc.atomic_write(path, {"key": "value"})
assert json.loads(path.read_text()) == {"key": "value"}
class TestFindDuplicates:
def test_no_duplicates(self):
entities = {"albert": {}, "mondo-gate": {}}
groups = kc.find_duplicates(entities)
assert len(groups) == 0
def test_case_duplicates(self):
entities = {"Albert": {}, "albert": {}, "ALBERT": {}}
groups = kc.find_duplicates(entities)
assert len(groups) >= 1
def test_substring_duplicates(self):
entities = {"mondo": {"type": "company"}, "mondo gate": {"type": "company"}}
groups = kc.find_duplicates(entities)
assert len(groups) >= 1
class TestPickCanonical:
def test_prefers_uppercase(self):
names = ["albert", "Albert"]
entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
assert kc.pick_canonical(names, entities) == "Albert"
def test_prefers_more_fields(self):
names = ["a", "A"]
entities = {"a": {"type": "person"}, "A": {"type": "person", "source": "x", "extra": "y"}}
assert kc.pick_canonical(names, entities) == "A"
class TestDeduplicate:
def test_merges_entities(self):
entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
rels = {}
e, r = kc.deduplicate(entities, rels, dry_run=False)
assert len(e) == 1
def test_dry_run_no_change(self):
entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
rels = {}
e, r = kc.deduplicate(entities, rels, dry_run=True)
assert len(e) == 2 # unchanged in dry run
def test_updates_relationships(self):
entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
rels = {
"albert::mondo": {"a": "albert", "b": "mondo", "types": ["co-occurrence"], "count": 1, "first_seen": "2026-01-01", "last_seen": "2026-01-01"},
}
e, r = kc.deduplicate(entities, rels, dry_run=False)
# Relationship should be remapped to canonical
assert len(r) == 1
class TestScoreRelationships:
def test_scores_assigned(self):
rels = {
"a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat(), "first_seen": "2026-01-01"},
}
result = kc.score_relationships(rels, dry_run=False)
assert "strength" in result["a::b"]
assert 0 < result["a::b"]["strength"] <= 1
def test_removes_weak(self):
old_date = (datetime.now() - timedelta(days=300)).isoformat()
rels = {
"a::b": {"count": 1, "types": ["co-occurrence"], "last_seen": old_date, "first_seen": old_date},
}
result = kc.score_relationships(rels, dry_run=False)
# Very old + low count should have low strength
if len(result) > 0:
assert result["a::b"]["strength"] < 0.3
def test_dry_run(self):
rels = {
"a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat()},
}
result = kc.score_relationships(rels, dry_run=True)
assert "strength" not in result["a::b"]
class TestClassifyUnknowns:
@patch("knowledge_cleanup.ollama_generate")
def test_no_unknowns(self, mock_ollama):
entities = {"albert": {"type": "person"}}
result = kc.classify_unknowns(entities, dry_run=False)
mock_ollama.assert_not_called()
assert result == entities
@patch("knowledge_cleanup.ollama_generate")
def test_classifies_unknowns(self, mock_ollama):
mock_ollama.return_value = '{"1": "person"}'
entities = {"albert": {"type": "unknown"}}
result = kc.classify_unknowns(entities, dry_run=False)
assert result["albert"]["type"] == "person"
@patch("knowledge_cleanup.ollama_generate")
def test_dry_run_no_change(self, mock_ollama):
mock_ollama.return_value = '{"1": "person"}'
entities = {"albert": {"type": "unknown"}}
result = kc.classify_unknowns(entities, dry_run=True)
assert result["albert"]["type"] == "unknown"
@patch("knowledge_cleanup.ollama_generate")
def test_handles_llm_failure(self, mock_ollama):
mock_ollama.side_effect = Exception("timeout")
entities = {"albert": {"type": "unknown"}}
result = kc.classify_unknowns(entities, dry_run=False)
assert result["albert"]["type"] == "unknown" # unchanged

View file

@ -0,0 +1,61 @@
"""Tests for knowledge_extractor.py (darkplex-core root) — Smart Extractor."""
import sys
from pathlib import Path
from unittest.mock import patch
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
from knowledge_extractor import importance_heuristic, parse_since
class TestImportanceHeuristic:
def test_empty(self):
assert importance_heuristic("") == 0.0
assert importance_heuristic(None) == 0.0
def test_short_text(self):
score = importance_heuristic("Hello world")
assert 0 < score <= 1.0
def test_long_text_boosted(self):
short = importance_heuristic("Hello")
long = importance_heuristic("x " * 300)
assert long > short
def test_heartbeat_penalized(self):
score = importance_heuristic("HEARTBEAT_OK system running fine no issues detected at all")
assert score < 0.3
def test_business_boosted(self):
score = importance_heuristic("Meeting about the project deadline and contract with the client partnership")
assert score > 0.4
def test_capitalized_names_boost(self):
text = "Albert discussed with Thomas, Sarah, Michael, Peter, Franz, and Maria about the Company"
score = importance_heuristic(text)
assert score > 0.4
def test_clamped(self):
# Even extreme texts should be 0-1
score = importance_heuristic("cron: heartbeat HEARTBEAT_OK health check no critical")
assert 0 <= score <= 1.0
class TestParseSince:
def test_hours(self):
ts = parse_since("6h")
assert ts is not None
assert ts > 0
def test_days(self):
ts = parse_since("1d")
assert ts is not None
def test_minutes(self):
ts = parse_since("30m")
assert ts is not None
def test_invalid(self):
assert parse_since("abc") is None
assert parse_since("") is None

147
tests/test_llm_extractor.py Normal file
View file

@ -0,0 +1,147 @@
"""Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor."""
import json
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
from llm_extractor import (
_parse_json_response,
_normalize_entities,
extract_entities_llm,
extract_entities_llm_batch,
is_available,
VALID_TYPES,
)
class TestParseJsonResponse:
def test_empty(self):
assert _parse_json_response("") == {}
assert _parse_json_response(None) == {}
def test_plain_json(self):
r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}')
assert "albert" in r
def test_markdown_fenced(self):
r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```')
assert "albert" in r
def test_no_json(self):
assert _parse_json_response("no json here") == {}
def test_nested_braces(self):
r = _parse_json_response('{"a": {"type": "person", "context": "test"}}')
assert "a" in r
class TestNormalizeEntities:
def test_valid_entity(self):
raw = {"Albert": {"type": "person", "context": "CEO of company"}}
result = _normalize_entities(raw)
assert "albert" in result
assert result["albert"]["type"] == "person"
assert result["albert"]["match"] == "llm"
def test_type_alias(self):
raw = {"python": {"type": "language", "context": "programming"}}
result = _normalize_entities(raw)
assert result["python"]["type"] == "technology"
def test_unknown_type_becomes_concept(self):
raw = {"thing": {"type": "xyzzy", "context": "unknown"}}
result = _normalize_entities(raw)
assert result["thing"]["type"] == "concept"
def test_filters_short_names(self):
raw = {"x": {"type": "person", "context": "test"}}
result = _normalize_entities(raw)
assert len(result) == 0
def test_filters_long_names(self):
raw = {"a" * 81: {"type": "person", "context": "test"}}
result = _normalize_entities(raw)
assert len(result) == 0
def test_non_dict_info_skipped(self):
raw = {"test": "not a dict"}
result = _normalize_entities(raw)
assert len(result) == 0
def test_context_truncated(self):
raw = {"test": {"type": "person", "context": "x" * 200}}
result = _normalize_entities(raw)
assert len(result["test"]["context"]) <= 100
def test_underscores_to_hyphens(self):
raw = {"mondo_gate": {"type": "company", "context": "test"}}
result = _normalize_entities(raw)
assert "mondo-gate" in result
class TestExtractEntitiesLlm:
@patch("llm_extractor._call_ollama")
def test_empty_text(self, mock_ollama):
assert extract_entities_llm("") == {}
assert extract_entities_llm("short") == {}
mock_ollama.assert_not_called()
@patch("llm_extractor._call_ollama")
def test_ollama_unavailable(self, mock_ollama):
mock_ollama.return_value = None
result = extract_entities_llm("This is a test about Albert and Mondo Gate AG")
assert result is None # signals fallback
@patch("llm_extractor._call_ollama")
def test_successful_extraction(self, mock_ollama):
mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}'
result = extract_entities_llm("Albert discussed the project with the team members today")
assert "albert" in result
assert result["albert"]["type"] == "person"
@patch("llm_extractor._call_ollama")
def test_truncates_long_text(self, mock_ollama):
mock_ollama.return_value = "{}"
extract_entities_llm("x" * 3000)
call_args = mock_ollama.call_args[0][0]
# The text in the prompt should be truncated
assert len(call_args) < 3000 + 500 # prompt overhead
class TestExtractEntitiesLlmBatch:
@patch("llm_extractor._call_ollama")
def test_empty_list(self, mock_ollama):
assert extract_entities_llm_batch([]) == {}
mock_ollama.assert_not_called()
@patch("llm_extractor._call_ollama")
def test_filters_short_texts(self, mock_ollama):
mock_ollama.return_value = "{}"
result = extract_entities_llm_batch(["hi", "yo", ""])
assert result == {}
mock_ollama.assert_not_called()
@patch("llm_extractor._call_ollama")
def test_batch_extraction(self, mock_ollama):
mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}'
result = extract_entities_llm_batch(["Python is a great programming language for data science"])
assert "python" in result
class TestIsAvailable:
@patch("llm_extractor.urllib.request.urlopen")
def test_available(self, mock_urlopen):
mock_resp = MagicMock()
mock_resp.status = 200
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
mock_resp.__exit__ = MagicMock(return_value=False)
mock_urlopen.return_value = mock_resp
assert is_available() is True
@patch("llm_extractor.urllib.request.urlopen")
def test_unavailable(self, mock_urlopen):
mock_urlopen.side_effect = Exception("connection refused")
assert is_available() is False

119
tests/test_loop.py Normal file
View file

@ -0,0 +1,119 @@
"""Tests for intelligence/loop.py — Darkplex Loop state machine and helpers."""
import json
import sys
import time
from datetime import datetime, timezone, timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
import loop as darkplex_loop
class TestImportance:
def test_empty(self):
assert darkplex_loop._importance("") == 0.0
def test_heartbeat_low(self):
assert darkplex_loop._importance("HEARTBEAT_OK all systems nominal") < 0.2
def test_business_content_high(self):
score = darkplex_loop._importance("Meeting about the project deadline and budget milestone")
assert score > 0.4
def test_clamped(self):
for text in ["", "x" * 1000, "meeting project company contract decision strategy"]:
s = darkplex_loop._importance(text)
assert 0.0 <= s <= 1.0
class TestLoopState:
def test_init(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
assert state.status == "INIT"
assert state.cycle_count == 0
def test_save_and_load(self, tmp_path):
sf = tmp_path / "state.json"
with patch.object(darkplex_loop, 'STATE_FILE', sf):
state = darkplex_loop.LoopState()
state.status = "RUNNING"
state.cycle_count = 5
state.save()
state2 = darkplex_loop.LoopState()
assert state2.status == "RUNNING"
assert state2.cycle_count == 5
def test_record_success(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
state.record_success({"test": "ok"})
assert state.status == "RUNNING"
assert state.consecutive_failures == 0
assert state.cycle_count == 1
def test_record_failure_degraded(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
state.record_failure("ingest", "timeout")
assert state.status == "DEGRADED"
assert state.consecutive_failures == 1
def test_record_failure_emergency(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
for i in range(3):
state.record_failure("ingest", "timeout")
assert state.status == "EMERGENCY"
def test_can_alert(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
assert state.can_alert()
state.mark_alerted()
assert not state.can_alert()
def test_record_perf(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
state.record_perf({"total_ms": 1000, "ingest_ms": 200})
assert state.perf["total_ms"] == 1000
assert len(state.perf_history) == 1
def test_perf_averages(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
state.record_perf({"total_ms": 1000})
state.record_perf({"total_ms": 2000})
avgs = state.perf_averages()
assert avgs["total_ms"] == 1500
def test_perf_history_capped(self, tmp_path):
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
state = darkplex_loop.LoopState()
for i in range(15):
state.record_perf({"total_ms": i * 100})
assert len(state.perf_history) == 10
class TestCheckNewEvents:
@patch("loop.subprocess.run")
def test_returns_pending(self, mock_run):
mock_run.return_value = MagicMock(
returncode=0, stdout=json.dumps({"num_pending": 42})
)
assert darkplex_loop.check_new_events() == 42
@patch("loop.subprocess.run")
def test_returns_negative_on_failure(self, mock_run):
mock_run.return_value = MagicMock(returncode=1, stdout="")
assert darkplex_loop.check_new_events() == -1
@patch("loop.subprocess.run")
def test_handles_exception(self, mock_run):
mock_run.side_effect = Exception("nats not found")
assert darkplex_loop.check_new_events() == -1

View file

@ -0,0 +1,72 @@
"""Tests for intelligence/shared_memory module."""
import json
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory
class TestInsight:
def test_creation(self):
i = Insight(agent="claudia", topic="test", content="hello")
assert i.agent == "claudia"
assert i.timestamp # auto-set
def test_to_json(self):
i = Insight(agent="claudia", topic="test", content="hello")
data = json.loads(i.to_json())
assert data["agent"] == "claudia"
assert data["topic"] == "test"
def test_from_json(self):
i = Insight(agent="claudia", topic="test", content="hello", confidence=0.9)
i2 = Insight.from_json(i.to_json())
assert i2.agent == i.agent
assert i2.confidence == 0.9
def test_default_confidence(self):
i = Insight(agent="claudia", topic="t", content="c")
assert i.confidence == 0.8
def test_tags(self):
i = Insight(agent="claudia", topic="t", content="c", tags=["a", "b"])
assert len(i.tags) == 2
class TestSharedMemory:
def test_allowed_agent(self):
agent = list(ALLOWED_AGENTS)[0]
sm = SharedMemory(agent_name=agent)
assert sm.agent_name == agent
def test_disallowed_agent(self):
with pytest.raises(ValueError, match="not allowed"):
SharedMemory(agent_name="hacker_bot")
def test_not_connected_publish(self):
agent = list(ALLOWED_AGENTS)[0]
sm = SharedMemory(agent_name=agent)
with pytest.raises(RuntimeError, match="Not connected"):
import asyncio
asyncio.get_event_loop().run_until_complete(
sm.publish(Insight(agent=agent, topic="t", content="c"))
)
def test_not_connected_subscribe(self):
agent = list(ALLOWED_AGENTS)[0]
sm = SharedMemory(agent_name=agent)
with pytest.raises(RuntimeError, match="Not connected"):
import asyncio
asyncio.get_event_loop().run_until_complete(
sm.subscribe("t", lambda x: None)
)
class TestAllowedAgents:
def test_default_agents(self):
assert "claudia" in ALLOWED_AGENTS
assert len(ALLOWED_AGENTS) >= 1

77
tests/test_temporal.py Normal file
View file

@ -0,0 +1,77 @@
"""Tests for intelligence/temporal.py — Temporal Context API."""
import sys
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
from temporal import TemporalEntry, TemporalQuery, TemporalContext
class TestTemporalEntry:
def test_creation(self):
e = TemporalEntry(
timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc),
source="nats",
topic="ssl-cert",
content="SSL cert expiring",
)
assert e.source == "nats"
assert e.relevance_score == 0.0
def test_metadata(self):
e = TemporalEntry(
timestamp=datetime.now(timezone.utc),
source="chromadb",
topic="test",
content="test",
metadata={"key": "value"},
relevance_score=0.95,
)
assert e.metadata["key"] == "value"
assert e.relevance_score == 0.95
class TestTemporalQuery:
def test_defaults(self):
q = TemporalQuery(topic="test")
assert q.limit == 50
assert "nats" in q.sources
assert "chromadb" in q.sources
def test_custom(self):
q = TemporalQuery(
topic="ssl",
start_time=datetime(2026, 1, 1, tzinfo=timezone.utc),
limit=10,
sources=["nats"],
)
assert q.limit == 10
assert len(q.sources) == 1
class TestTemporalContext:
def test_init_defaults(self):
ctx = TemporalContext()
assert "localhost" in ctx.nats_url
assert "localhost" in ctx.chromadb_url
def test_init_custom(self):
ctx = TemporalContext(nats_url="nats://custom:4222", chromadb_url="http://custom:8000")
assert ctx.nats_url == "nats://custom:4222"
@pytest.mark.asyncio
async def test_query_no_connections(self):
ctx = TemporalContext()
# No connections established, should return empty
result = await ctx.query(TemporalQuery(topic="test"))
assert result == []
@pytest.mark.asyncio
async def test_close_no_connection(self):
ctx = TemporalContext()
await ctx.close() # Should not raise