Merge darkplex-core into cortex — unified intelligence layer v0.2.0
Some checks failed
Tests / test (push) Failing after 2s
Some checks failed
Tests / test (push) Failing after 2s
- Merged all unique darkplex-core modules into cortex: - intelligence/ subfolder (anticipator, collective, shared_memory, knowledge_cleanup, temporal, llm_extractor, loop) - governance/ subfolder (policy engine, risk scorer, evidence, enforcer, report generator) - entity_manager.py, knowledge_extractor.py - Fixed bare 'from intelligence.' imports to 'from cortex.intelligence.' - Added 'darkplex' CLI alias alongside 'cortex' - Package renamed to darkplex-core v0.2.0 - 405 tests passing (was 234) - 14 new test files covering all merged modules
This commit is contained in:
parent
fda607c204
commit
fd7d75c0ed
41 changed files with 6368 additions and 3 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -5,3 +5,4 @@ dist/
|
|||
build/
|
||||
.eggs/
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
|
|
|
|||
371
cortex/entity_manager.py
Executable file
371
cortex/entity_manager.py
Executable file
|
|
@ -0,0 +1,371 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Entity Manager — File-based knowledge graph for entity extraction and relationship mapping.
|
||||
Part of Level 4.4 AGI Roadmap.
|
||||
|
||||
Usage:
|
||||
entity-manager.py bootstrap — Bootstrap from life/areas/
|
||||
entity-manager.py extract "text" — Extract entities from text
|
||||
entity-manager.py relate "A" "B" [type] — Create/update relationship
|
||||
entity-manager.py query "entity" — Query relationships for entity
|
||||
entity-manager.py graph — Output relationship summary
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
|
||||
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
|
||||
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
|
||||
LIFE_AREAS = Path.home() / "life" / "areas"
|
||||
|
||||
# Common words to skip during entity extraction
|
||||
STOP_WORDS = {
|
||||
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
||||
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
||||
"should", "may", "might", "shall", "can", "need", "must", "i", "you",
|
||||
"he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
|
||||
"my", "your", "his", "its", "our", "their", "this", "that", "these",
|
||||
"those", "what", "which", "who", "whom", "where", "when", "why", "how",
|
||||
"all", "each", "every", "both", "few", "more", "most", "other", "some",
|
||||
"such", "no", "nor", "not", "only", "own", "same", "so", "than", "too",
|
||||
"very", "just", "because", "as", "until", "while", "of", "at", "by",
|
||||
"for", "with", "about", "against", "between", "through", "during",
|
||||
"before", "after", "above", "below", "to", "from", "up", "down", "in",
|
||||
"out", "on", "off", "over", "under", "again", "further", "then", "once",
|
||||
"here", "there", "and", "but", "or", "if", "then", "else", "also",
|
||||
"system", "cron", "heartbeat", "ok", "error", "warning", "info",
|
||||
"message", "session", "agent", "main", "matrix", "telegram",
|
||||
"read", "write", "check", "run", "send", "get", "set", "let", "see",
|
||||
"know", "think", "want", "like", "make", "take", "come", "go", "say",
|
||||
"tell", "ask", "try", "use", "find", "give", "new", "good", "first",
|
||||
"last", "long", "great", "little", "right", "big", "high", "old",
|
||||
"different", "small", "large", "next", "early", "young", "important",
|
||||
"public", "bad", "sure", "sure", "yes", "no", "maybe", "ok", "okay",
|
||||
"thanks", "thank", "please", "hello", "hi", "hey", "bye", "well",
|
||||
"now", "today", "tomorrow", "yesterday", "monday", "tuesday",
|
||||
"wednesday", "thursday", "friday", "saturday", "sunday",
|
||||
"january", "february", "march", "april", "may", "june", "july",
|
||||
"august", "september", "october", "november", "december",
|
||||
"still", "already", "currently", "actually", "really", "right",
|
||||
"look", "keep", "going", "based", "done", "work", "working",
|
||||
}
|
||||
|
||||
|
||||
def normalize(name):
|
||||
"""Normalize entity name."""
|
||||
return name.strip().lower().replace("_", "-")
|
||||
|
||||
|
||||
def load_json(path):
|
||||
"""Load JSON file, return empty dict if missing/invalid."""
|
||||
try:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_json(path, data):
|
||||
"""Save JSON file, creating directories as needed."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def load_known_entities():
|
||||
"""Load known entity names from life/areas/ and entities.json."""
|
||||
known = {}
|
||||
# From life/areas
|
||||
for category in ["people", "companies", "projects"]:
|
||||
area_dir = LIFE_AREAS / category
|
||||
if not area_dir.exists():
|
||||
continue
|
||||
etype = category.rstrip("s") # person, company, project
|
||||
if category == "people":
|
||||
etype = "person"
|
||||
for entry in area_dir.iterdir():
|
||||
if entry.is_dir():
|
||||
name = normalize(entry.name)
|
||||
known[name] = {"type": etype, "source": f"life/areas/{category}"}
|
||||
# From entities.json
|
||||
entities = load_json(ENTITIES_FILE)
|
||||
for name, info in entities.items():
|
||||
if name not in known:
|
||||
known[name] = info
|
||||
return known
|
||||
|
||||
|
||||
def extract_entities(text, known=None):
|
||||
"""Extract entities from text using heuristics and known entity matching."""
|
||||
if known is None:
|
||||
known = load_known_entities()
|
||||
|
||||
found = {}
|
||||
text_lower = text.lower()
|
||||
|
||||
# 1. Match known entities
|
||||
for name, info in known.items():
|
||||
# Check for name or slug in text
|
||||
variants = [name, name.replace("-", " "), name.replace("-", "")]
|
||||
for v in variants:
|
||||
if v in text_lower and len(v) > 2:
|
||||
found[name] = {"type": info.get("type", "unknown"), "match": "known"}
|
||||
break
|
||||
|
||||
# 2. Extract @mentions
|
||||
for m in re.finditer(r"@(\w+)", text):
|
||||
name = normalize(m.group(1))
|
||||
if name not in found and name not in STOP_WORDS and len(name) > 2:
|
||||
found[name] = {"type": "person", "match": "mention"}
|
||||
|
||||
# 3. Extract capitalized multi-word names (likely proper nouns)
|
||||
for m in re.finditer(r"\b([A-Z][a-zäöüß]+(?:\s+[A-Z][a-zäöüß]+)+)\b", text):
|
||||
name = normalize(m.group(1))
|
||||
if name not in found and name not in STOP_WORDS and len(name) > 3:
|
||||
# Heuristic: if 2-3 words, likely person; if more, likely org/topic
|
||||
words = name.split()
|
||||
etype = "person" if len(words) <= 3 else "topic"
|
||||
found[name] = {"type": etype, "match": "capitalized"}
|
||||
|
||||
# 4. Extract standalone capitalized words (potential entities)
|
||||
for m in re.finditer(r"\b([A-Z][a-zäöüß]{2,})\b", text):
|
||||
name = normalize(m.group(1))
|
||||
if name not in found and name not in STOP_WORDS:
|
||||
found[name] = {"type": "unknown", "match": "capitalized_single"}
|
||||
|
||||
# 5. Extract ALL-CAPS acronyms (likely companies/products)
|
||||
for m in re.finditer(r"\b([A-Z]{2,6})\b", text):
|
||||
name = normalize(m.group(1))
|
||||
if name not in found and name not in STOP_WORDS and name not in {
|
||||
"ok", "am", "pm", "gmt", "utc", "url", "api", "cli", "ssh", "dns",
|
||||
"http", "https", "json", "html", "css", "js", "ts", "py", "md",
|
||||
"id", "ui", "ux", "io", "os", "ip", "gb", "mb", "kb", "tb",
|
||||
}:
|
||||
found[name] = {"type": "organization", "match": "acronym"}
|
||||
|
||||
return found
|
||||
|
||||
|
||||
def cmd_bootstrap():
|
||||
"""Bootstrap entities from life/areas/."""
|
||||
entities = load_json(ENTITIES_FILE)
|
||||
relationships = load_json(RELATIONSHIPS_FILE)
|
||||
count = 0
|
||||
|
||||
for category in ["people", "companies"]:
|
||||
area_dir = LIFE_AREAS / category
|
||||
if not area_dir.exists():
|
||||
continue
|
||||
etype = "person" if category == "people" else "company"
|
||||
for entry in sorted(area_dir.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
name = normalize(entry.name)
|
||||
if name in entities:
|
||||
continue
|
||||
|
||||
info = {"type": etype, "source": f"life/areas/{category}", "bootstrapped": True}
|
||||
|
||||
# Try to extract extra info from summary.md
|
||||
summary_path = entry / "summary.md"
|
||||
if summary_path.exists():
|
||||
try:
|
||||
summary = summary_path.read_text(errors="replace")[:2000]
|
||||
# Extract email
|
||||
em = re.search(r"\*\*Email:\*\*\s*(\S+)", summary)
|
||||
if em:
|
||||
info["email"] = em.group(1)
|
||||
# Extract context
|
||||
ctx = re.search(r"\*\*Kontext:\*\*\s*(.+)", summary)
|
||||
if ctx:
|
||||
info["context"] = ctx.group(1).strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
entities[name] = info
|
||||
count += 1
|
||||
|
||||
save_json(ENTITIES_FILE, entities)
|
||||
save_json(RELATIONSHIPS_FILE, relationships)
|
||||
print(f"Bootstrapped {count} new entities. Total: {len(entities)}")
|
||||
|
||||
|
||||
def cmd_extract(text):
|
||||
"""Extract and display entities from text."""
|
||||
known = load_known_entities()
|
||||
found = extract_entities(text, known)
|
||||
|
||||
if not found:
|
||||
print("No entities found.")
|
||||
return
|
||||
|
||||
# Update entities.json with new discoveries
|
||||
entities = load_json(ENTITIES_FILE)
|
||||
new_count = 0
|
||||
for name, info in found.items():
|
||||
if name not in entities:
|
||||
entities[name] = {
|
||||
"type": info["type"],
|
||||
"source": "extraction",
|
||||
"first_seen": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
}
|
||||
new_count += 1
|
||||
print(f" [{info['type']:12s}] {name} ({info['match']})")
|
||||
|
||||
if new_count:
|
||||
save_json(ENTITIES_FILE, entities)
|
||||
print(f"\n{new_count} new entities added to registry.")
|
||||
|
||||
|
||||
def cmd_relate(entity_a, entity_b, rel_type="related"):
|
||||
"""Create or update a relationship between two entities."""
|
||||
a, b = normalize(entity_a), normalize(entity_b)
|
||||
relationships = load_json(RELATIONSHIPS_FILE)
|
||||
entities = load_json(ENTITIES_FILE)
|
||||
|
||||
key = f"{min(a,b)}::{max(a,b)}"
|
||||
ts = time.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
if key in relationships:
|
||||
rel = relationships[key]
|
||||
rel["count"] = rel.get("count", 1) + 1
|
||||
rel["last_seen"] = ts
|
||||
if rel_type != "related" and rel_type not in rel.get("types", []):
|
||||
rel.setdefault("types", []).append(rel_type)
|
||||
print(f"Updated: {a} <-> {b} (seen {rel['count']}x)")
|
||||
else:
|
||||
relationships[key] = {
|
||||
"a": a, "b": b,
|
||||
"types": [rel_type],
|
||||
"count": 1,
|
||||
"first_seen": ts,
|
||||
"last_seen": ts,
|
||||
}
|
||||
print(f"Created: {a} <-> {b} ({rel_type})")
|
||||
|
||||
# Ensure both entities exist
|
||||
for name in [a, b]:
|
||||
if name not in entities:
|
||||
entities[name] = {"type": "unknown", "source": "relationship", "first_seen": ts}
|
||||
|
||||
save_json(RELATIONSHIPS_FILE, relationships)
|
||||
save_json(ENTITIES_FILE, entities)
|
||||
|
||||
|
||||
def cmd_query(entity_name):
|
||||
"""Query all relationships for an entity."""
|
||||
name = normalize(entity_name)
|
||||
relationships = load_json(RELATIONSHIPS_FILE)
|
||||
entities = load_json(ENTITIES_FILE)
|
||||
|
||||
# Entity info
|
||||
if name in entities:
|
||||
info = entities[name]
|
||||
print(f"Entity: {name}")
|
||||
print(f" Type: {info.get('type', 'unknown')}")
|
||||
if info.get("email"):
|
||||
print(f" Email: {info['email']}")
|
||||
if info.get("context"):
|
||||
print(f" Context: {info['context']}")
|
||||
if info.get("source"):
|
||||
print(f" Source: {info['source']}")
|
||||
else:
|
||||
print(f"Entity '{name}' not found in registry.")
|
||||
|
||||
# Relationships
|
||||
rels = []
|
||||
for key, rel in relationships.items():
|
||||
if rel["a"] == name or rel["b"] == name:
|
||||
other = rel["b"] if rel["a"] == name else rel["a"]
|
||||
rels.append((other, rel))
|
||||
|
||||
if rels:
|
||||
print(f"\nRelationships ({len(rels)}):")
|
||||
for other, rel in sorted(rels, key=lambda x: -x[1].get("count", 1)):
|
||||
types = ", ".join(rel.get("types", ["related"]))
|
||||
print(f" {name} <-> {other} [{types}] (seen {rel.get('count', 1)}x)")
|
||||
else:
|
||||
print("\nNo relationships found.")
|
||||
|
||||
# Check life/areas/
|
||||
for category in ["people", "companies", "projects"]:
|
||||
area_path = LIFE_AREAS / category / name.replace(" ", "-")
|
||||
if area_path.exists():
|
||||
summary_path = area_path / "summary.md"
|
||||
if summary_path.exists():
|
||||
print(f"\nLife area ({category}): {area_path}")
|
||||
content = summary_path.read_text(errors="replace")[:500]
|
||||
print(content)
|
||||
|
||||
|
||||
def cmd_graph():
|
||||
"""Output a simple relationship graph summary."""
|
||||
relationships = load_json(RELATIONSHIPS_FILE)
|
||||
entities = load_json(ENTITIES_FILE)
|
||||
|
||||
if not relationships:
|
||||
print("No relationships in knowledge graph.")
|
||||
return
|
||||
|
||||
# Count connections per entity
|
||||
connections = {}
|
||||
for key, rel in relationships.items():
|
||||
for name in [rel["a"], rel["b"]]:
|
||||
connections[name] = connections.get(name, 0) + 1
|
||||
|
||||
# Sort by connections
|
||||
top = sorted(connections.items(), key=lambda x: -x[1])
|
||||
|
||||
print(f"Knowledge Graph: {len(entities)} entities, {len(relationships)} relationships\n")
|
||||
print("Top connected entities:")
|
||||
for name, count in top[:20]:
|
||||
etype = entities.get(name, {}).get("type", "?")
|
||||
print(f" {name} ({etype}): {count} connections")
|
||||
|
||||
print(f"\nRecent relationships:")
|
||||
recent = sorted(relationships.values(), key=lambda r: r.get("last_seen", ""), reverse=True)[:10]
|
||||
for rel in recent:
|
||||
types = ", ".join(rel.get("types", ["related"]))
|
||||
print(f" {rel['a']} <-> {rel['b']} [{types}]")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
cmd = sys.argv[1]
|
||||
|
||||
if cmd == "bootstrap":
|
||||
cmd_bootstrap()
|
||||
elif cmd == "extract":
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: entity-manager.py extract \"text\"")
|
||||
sys.exit(1)
|
||||
cmd_extract(" ".join(sys.argv[2:]))
|
||||
elif cmd == "relate":
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: entity-manager.py relate \"entity_a\" \"entity_b\" [type]")
|
||||
sys.exit(1)
|
||||
rel_type = sys.argv[4] if len(sys.argv) > 4 else "related"
|
||||
cmd_relate(sys.argv[2], sys.argv[3], rel_type)
|
||||
elif cmd == "query":
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: entity-manager.py query \"entity\"")
|
||||
sys.exit(1)
|
||||
cmd_query(" ".join(sys.argv[2:]))
|
||||
elif cmd == "graph":
|
||||
cmd_graph()
|
||||
else:
|
||||
print(f"Unknown command: {cmd}")
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
cortex/governance/__init__.py
Normal file
0
cortex/governance/__init__.py
Normal file
228
cortex/governance/cli.py
Normal file
228
cortex/governance/cli.py
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
"""Governance CLI — policy evaluation, risk scoring, evidence & reporting.
|
||||
|
||||
Usage:
|
||||
darkplex governance evaluate --agent <name> --action <action> [--data-type <type>] [--target <target>] [--role <role>]
|
||||
darkplex governance risk --agent <name> --action <action> [--data-type <type>] [--target <target>] [--role <role>]
|
||||
darkplex governance evidence [--agent <name>] [--verdict <verdict>] [--control <id>] [--json]
|
||||
darkplex governance report [--agent <name>] [--json] [--output <path>]
|
||||
darkplex governance policies [--reload]
|
||||
darkplex governance status
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Default paths
|
||||
DEFAULT_POLICIES_DIR = os.environ.get(
|
||||
"GOVERNANCE_POLICIES_DIR",
|
||||
str(Path(__file__).parent / "policies"),
|
||||
)
|
||||
DEFAULT_CONTROLS_MAPPING = os.environ.get(
|
||||
"GOVERNANCE_CONTROLS_MAPPING",
|
||||
str(Path(__file__).parent / "controls" / "iso27001-mapping.yaml"),
|
||||
)
|
||||
|
||||
|
||||
def _build_context(args: argparse.Namespace) -> dict:
|
||||
"""Build an evaluation context from CLI args."""
|
||||
ctx = {}
|
||||
if args.agent:
|
||||
ctx["agent"] = args.agent
|
||||
if args.action:
|
||||
ctx["action"] = args.action
|
||||
if args.data_type:
|
||||
ctx["data_type"] = args.data_type
|
||||
if args.target:
|
||||
ctx["target"] = args.target
|
||||
if args.role:
|
||||
ctx["agent_role"] = args.role
|
||||
return ctx
|
||||
|
||||
|
||||
def _get_engine():
|
||||
from governance.policy import PolicyEngine
|
||||
return PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR)
|
||||
|
||||
|
||||
def _get_scorer():
|
||||
from governance.risk_scorer import RiskScorer
|
||||
return RiskScorer()
|
||||
|
||||
|
||||
def _get_enforcer():
|
||||
from governance.enforcer import Enforcer
|
||||
from governance.policy import PolicyEngine
|
||||
from governance.risk_scorer import RiskScorer
|
||||
from governance.evidence import EvidenceCollector, ControlMapping
|
||||
|
||||
return Enforcer(
|
||||
policy_engine=PolicyEngine(policies_dir=DEFAULT_POLICIES_DIR),
|
||||
risk_scorer=RiskScorer(),
|
||||
evidence_collector=EvidenceCollector(
|
||||
control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def cmd_evaluate(args: argparse.Namespace) -> None:
|
||||
"""Full governance evaluation: policy + risk + evidence."""
|
||||
enforcer = _get_enforcer()
|
||||
ctx = _build_context(args)
|
||||
decision = enforcer.evaluate(ctx)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({
|
||||
"verdict": decision.verdict,
|
||||
"reason": decision.reason,
|
||||
"risk_score": decision.risk.value,
|
||||
"risk_level": decision.risk.level,
|
||||
"risk_factors": decision.risk.factors,
|
||||
"policy_result": decision.policy_result,
|
||||
}, indent=2))
|
||||
else:
|
||||
icon = {"approve": "✅", "deny": "❌", "escalate": "⚠️"}.get(decision.verdict, "❓")
|
||||
print(f"{icon} Verdict: {decision.verdict.upper()}")
|
||||
print(f" Reason: {decision.reason}")
|
||||
print(f" Risk: {decision.risk.value}/10 ({decision.risk.level})")
|
||||
for factor, detail in decision.risk.factors.items():
|
||||
print(f" • {factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})")
|
||||
|
||||
|
||||
def cmd_risk(args: argparse.Namespace) -> None:
|
||||
"""Risk scoring only."""
|
||||
scorer = _get_scorer()
|
||||
ctx = _build_context(args)
|
||||
result = scorer.score(ctx)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({
|
||||
"risk_score": result.value,
|
||||
"risk_level": result.level,
|
||||
"acceptable": result.is_acceptable,
|
||||
"factors": result.factors,
|
||||
}, indent=2))
|
||||
else:
|
||||
icon = "🟢" if result.is_acceptable else "🔴"
|
||||
print(f"{icon} Risk Score: {result.value}/10 ({result.level})")
|
||||
print(f" Acceptable: {'yes' if result.is_acceptable else 'NO'}")
|
||||
for factor, detail in result.factors.items():
|
||||
print(f" • {factor}: {detail.get('value', detail)} (+{detail.get('score', 0)})")
|
||||
|
||||
|
||||
def cmd_policies(args: argparse.Namespace) -> None:
|
||||
"""List loaded policies."""
|
||||
engine = _get_engine()
|
||||
if not engine.policies:
|
||||
print("No policies loaded.")
|
||||
return
|
||||
for policy in engine.policies:
|
||||
print(f"📋 {policy.name} (v{policy.version})")
|
||||
print(f" {policy.description}")
|
||||
print(f" Rules: {len(policy.rules)}")
|
||||
for rule in policy.rules:
|
||||
print(f" • {rule.name} → {rule.effect} (priority: {rule.priority})")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_status(args: argparse.Namespace) -> None:
|
||||
"""Show governance system status."""
|
||||
engine = _get_engine()
|
||||
scorer = _get_scorer()
|
||||
|
||||
policies_count = len(engine.policies)
|
||||
rules_count = sum(len(p.rules) for p in engine.policies)
|
||||
policies_dir = DEFAULT_POLICIES_DIR
|
||||
controls_file = DEFAULT_CONTROLS_MAPPING
|
||||
|
||||
print("🛡️ Darkplex Governance Status")
|
||||
print(f" Policies dir: {policies_dir}")
|
||||
print(f" Controls map: {controls_file}")
|
||||
print(f" Policies loaded: {policies_count}")
|
||||
print(f" Total rules: {rules_count}")
|
||||
print(f" Policies dir exists: {'✅' if Path(policies_dir).exists() else '❌'}")
|
||||
print(f" Controls file exists: {'✅' if Path(controls_file).exists() else '❌'}")
|
||||
|
||||
|
||||
def cmd_report(args: argparse.Namespace) -> None:
|
||||
"""Generate compliance report (placeholder — needs live evidence)."""
|
||||
from governance.evidence import EvidenceCollector, ControlMapping
|
||||
from governance.report_generator import ReportGenerator
|
||||
|
||||
collector = EvidenceCollector(
|
||||
control_mapping=ControlMapping(DEFAULT_CONTROLS_MAPPING)
|
||||
)
|
||||
generator = ReportGenerator(collector)
|
||||
|
||||
if args.agent:
|
||||
report = generator.generate_agent_report(args.agent)
|
||||
else:
|
||||
report = generator.generate_compliance_report()
|
||||
|
||||
output = json.dumps(report, indent=2)
|
||||
if args.output:
|
||||
Path(args.output).write_text(output)
|
||||
print(f"✅ Report written to {args.output}")
|
||||
else:
|
||||
print(output)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(prog="darkplex governance", description="Governance Engine")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
sub = parser.add_subparsers(dest="subcmd")
|
||||
|
||||
# evaluate
|
||||
p_eval = sub.add_parser("evaluate", aliases=["eval"], help="Full policy + risk evaluation")
|
||||
p_eval.add_argument("--agent", required=True)
|
||||
p_eval.add_argument("--action", required=True)
|
||||
p_eval.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"])
|
||||
p_eval.add_argument("--target", default="internal", choices=["internal", "external"])
|
||||
p_eval.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"])
|
||||
p_eval.add_argument("--json", action="store_true", dest="json")
|
||||
|
||||
# risk
|
||||
p_risk = sub.add_parser("risk", help="Risk scoring only")
|
||||
p_risk.add_argument("--agent", default="unknown")
|
||||
p_risk.add_argument("--action", default="unknown")
|
||||
p_risk.add_argument("--data-type", default="public", choices=["public", "internal", "confidential", "restricted"])
|
||||
p_risk.add_argument("--target", default="internal", choices=["internal", "external"])
|
||||
p_risk.add_argument("--role", default="assistant", choices=["admin", "operator", "assistant", "external"])
|
||||
p_risk.add_argument("--json", action="store_true", dest="json")
|
||||
|
||||
# policies
|
||||
p_pol = sub.add_parser("policies", help="List loaded policies")
|
||||
p_pol.add_argument("--reload", action="store_true")
|
||||
|
||||
# status
|
||||
sub.add_parser("status", help="Show governance status")
|
||||
|
||||
# report
|
||||
p_rep = sub.add_parser("report", help="Generate compliance report")
|
||||
p_rep.add_argument("--agent", default=None)
|
||||
p_rep.add_argument("--output", "-o", default=None)
|
||||
p_rep.add_argument("--json", action="store_true", dest="json")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.subcmd in ("evaluate", "eval"):
|
||||
cmd_evaluate(args)
|
||||
elif args.subcmd == "risk":
|
||||
cmd_risk(args)
|
||||
elif args.subcmd == "policies":
|
||||
cmd_policies(args)
|
||||
elif args.subcmd == "status":
|
||||
cmd_status(args)
|
||||
elif args.subcmd == "report":
|
||||
cmd_report(args)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
70
cortex/governance/controls/iso27001-mapping.yaml
Normal file
70
cortex/governance/controls/iso27001-mapping.yaml
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# ISO 27001 Annex A Controls → NATS Event Streams Mapping
|
||||
#
|
||||
# Maps governance event types to ISO 27001:2022 Annex A controls.
|
||||
# Used by the Evidence Collector to tag evidence with applicable controls.
|
||||
|
||||
version: "1.0.0"
|
||||
|
||||
mappings:
|
||||
# A.5 — Organizational Controls (Information Security Policies)
|
||||
- controls:
|
||||
- "A.5.1" # Policies for information security
|
||||
- "A.5.2" # Information security roles and responsibilities
|
||||
- "A.5.4" # Management responsibilities
|
||||
event_types:
|
||||
- policy_evaluation
|
||||
- policy_update
|
||||
- policy_violation
|
||||
nats_subjects:
|
||||
- "governance.policy.>"
|
||||
|
||||
# A.5.10-12 — Acceptable use, return, classification
|
||||
- controls:
|
||||
- "A.5.10" # Acceptable use of information
|
||||
- "A.5.12" # Classification of information
|
||||
- "A.5.13" # Labelling of information
|
||||
event_types:
|
||||
- data_access
|
||||
- data_classification
|
||||
- data_export
|
||||
nats_subjects:
|
||||
- "governance.data.>"
|
||||
|
||||
# A.8 — Technology Controls (Asset Management)
|
||||
- controls:
|
||||
- "A.8.1" # User endpoint devices
|
||||
- "A.8.2" # Privileged access rights
|
||||
- "A.8.5" # Secure authentication
|
||||
- "A.8.9" # Configuration management
|
||||
- "A.8.16" # Monitoring activities
|
||||
event_types:
|
||||
- agent_authentication
|
||||
- agent_action
|
||||
- system_configuration
|
||||
- monitoring_alert
|
||||
nats_subjects:
|
||||
- "governance.agent.>"
|
||||
- "governance.system.>"
|
||||
|
||||
# A.9 — Access Control
|
||||
- controls:
|
||||
- "A.5.15" # Access control
|
||||
- "A.5.16" # Identity management
|
||||
- "A.5.17" # Authentication information
|
||||
- "A.5.18" # Access rights
|
||||
event_types:
|
||||
- access_request
|
||||
- access_granted
|
||||
- access_denied
|
||||
- role_change
|
||||
nats_subjects:
|
||||
- "governance.access.>"
|
||||
|
||||
# A.5.23-25 — Supplier/Cloud
|
||||
- controls:
|
||||
- "A.5.23" # Information security for cloud services
|
||||
event_types:
|
||||
- external_api_call
|
||||
- cloud_service_access
|
||||
nats_subjects:
|
||||
- "governance.external.>"
|
||||
129
cortex/governance/enforcer.py
Normal file
129
cortex/governance/enforcer.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""Runtime Enforcer: pre-execution policy check (approve/deny/escalate).
|
||||
|
||||
The enforcer is the single entry point for all agent action governance.
|
||||
It orchestrates the policy engine, risk scorer, and evidence collector.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from governance.evidence import EvidenceCollector
|
||||
from governance.policy import PolicyEngine
|
||||
from governance.risk_scorer import RiskResult, RiskScorer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Decision:
|
||||
"""The final governance decision for an agent action."""
|
||||
|
||||
verdict: str # "approve", "deny", "escalate"
|
||||
reason: str
|
||||
risk: RiskResult
|
||||
policy_result: dict[str, Any]
|
||||
|
||||
@property
|
||||
def approved(self) -> bool:
|
||||
return self.verdict == "approve"
|
||||
|
||||
|
||||
class Enforcer:
|
||||
"""Pre-execution governance enforcer.
|
||||
|
||||
Evaluates every agent action against policies and risk scoring,
|
||||
records evidence, and returns a decision.
|
||||
|
||||
Usage:
|
||||
enforcer = Enforcer(policy_engine, risk_scorer, evidence_collector)
|
||||
decision = enforcer.evaluate({"agent": "claudia", "action": "send_email", ...})
|
||||
if decision.approved:
|
||||
execute_action()
|
||||
"""
|
||||
|
||||
# Risk levels that override policy to deny/escalate
|
||||
RISK_OVERRIDES: dict[str, str] = {
|
||||
"critical": "deny",
|
||||
"high": "escalate",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
policy_engine: PolicyEngine | None = None,
|
||||
risk_scorer: RiskScorer | None = None,
|
||||
evidence_collector: EvidenceCollector | None = None,
|
||||
) -> None:
|
||||
self.policy_engine = policy_engine or PolicyEngine()
|
||||
self.risk_scorer = risk_scorer or RiskScorer()
|
||||
self.evidence_collector = evidence_collector or EvidenceCollector()
|
||||
|
||||
def evaluate(self, context: dict[str, Any]) -> Decision:
|
||||
"""Evaluate an agent action and return a governance decision.
|
||||
|
||||
Args:
|
||||
context: Action context dict with keys like:
|
||||
- agent: agent identifier
|
||||
- action: action name
|
||||
- data_type / data_classification: data sensitivity
|
||||
- target: internal/external
|
||||
- agent_role: role of the requesting agent
|
||||
- hour: time of day (optional)
|
||||
|
||||
Returns:
|
||||
Decision with verdict, reason, risk score, and policy result.
|
||||
"""
|
||||
# Normalize data_type
|
||||
if "data_classification" in context and "data_type" not in context:
|
||||
context["data_type"] = context["data_classification"]
|
||||
|
||||
# Step 1: Risk scoring
|
||||
risk = self.risk_scorer.score(context)
|
||||
|
||||
# Step 2: Policy evaluation
|
||||
policy_result = self.policy_engine.evaluate(context)
|
||||
policy_verdict = policy_result["verdict"]
|
||||
|
||||
# Step 3: Combine — risk can override policy to be MORE restrictive
|
||||
verdict = policy_verdict
|
||||
reason = policy_result["reason"]
|
||||
|
||||
risk_override = self.RISK_OVERRIDES.get(risk.level)
|
||||
if risk_override:
|
||||
strictness = {"deny": 0, "escalate": 1, "allow": 2}
|
||||
if strictness.get(risk_override, 2) < strictness.get(verdict, 2):
|
||||
verdict = risk_override
|
||||
reason = f"Risk override ({risk.level}): {reason}"
|
||||
|
||||
# Step 4: Record evidence
|
||||
agent = context.get("agent", "unknown")
|
||||
action = context.get("action", "unknown")
|
||||
self.evidence_collector.record(
|
||||
event_type="policy_evaluation",
|
||||
agent=agent,
|
||||
action=action,
|
||||
verdict=verdict,
|
||||
risk_score=risk.value,
|
||||
risk_level=risk.level,
|
||||
details={
|
||||
"context": context,
|
||||
"policy_result": policy_result,
|
||||
"risk_factors": risk.factors,
|
||||
},
|
||||
)
|
||||
|
||||
decision = Decision(
|
||||
verdict=verdict,
|
||||
reason=reason,
|
||||
risk=risk,
|
||||
policy_result=policy_result,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Enforcer decision: %s → %s (risk: %d/%s)",
|
||||
f"{agent}/{action}", verdict, risk.value, risk.level,
|
||||
)
|
||||
|
||||
return decision
|
||||
153
cortex/governance/evidence.py
Normal file
153
cortex/governance/evidence.py
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
"""Evidence Collector: NATS JetStream events → ISO 27001 control mapping.
|
||||
|
||||
Collects governance events from NATS, maps them to ISO 27001 Annex A controls,
|
||||
and stores evidence for audit reporting.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvidenceRecord:
|
||||
"""A single piece of compliance evidence."""
|
||||
|
||||
timestamp: str
|
||||
event_type: str
|
||||
agent: str
|
||||
action: str
|
||||
verdict: str
|
||||
risk_score: int
|
||||
risk_level: str
|
||||
controls: list[str] # ISO 27001 control IDs
|
||||
details: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"event_type": self.event_type,
|
||||
"agent": self.agent,
|
||||
"action": self.action,
|
||||
"verdict": self.verdict,
|
||||
"risk_score": self.risk_score,
|
||||
"risk_level": self.risk_level,
|
||||
"controls": self.controls,
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
|
||||
class ControlMapping:
|
||||
"""Maps event types to ISO 27001 Annex A controls."""
|
||||
|
||||
def __init__(self, mapping_path: str | None = None) -> None:
|
||||
self.mapping: dict[str, list[str]] = {}
|
||||
path = mapping_path or os.environ.get(
|
||||
"GOVERNANCE_CONTROLS_MAPPING", "controls/iso27001-mapping.yaml"
|
||||
)
|
||||
self._load_mapping(path)
|
||||
|
||||
def _load_mapping(self, path: str) -> None:
|
||||
"""Load the control mapping from YAML."""
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
data = yaml.safe_load(f)
|
||||
for mapping in data.get("mappings", []):
|
||||
for event_type in mapping.get("event_types", []):
|
||||
self.mapping.setdefault(event_type, []).extend(mapping.get("controls", []))
|
||||
logger.info("Loaded %d event type mappings", len(self.mapping))
|
||||
except FileNotFoundError:
|
||||
logger.warning("Control mapping not found: %s", path)
|
||||
except Exception:
|
||||
logger.exception("Failed to load control mapping: %s", path)
|
||||
|
||||
def get_controls(self, event_type: str) -> list[str]:
|
||||
"""Return ISO 27001 controls applicable to an event type."""
|
||||
return self.mapping.get(event_type, [])
|
||||
|
||||
|
||||
class EvidenceCollector:
|
||||
"""Collects and stores governance evidence from agent actions.
|
||||
|
||||
In production, this subscribes to NATS JetStream. For testing,
|
||||
evidence can be recorded directly via record().
|
||||
|
||||
Usage:
|
||||
collector = EvidenceCollector()
|
||||
collector.record(event_type="policy_evaluation", agent="claudia", ...)
|
||||
"""
|
||||
|
||||
def __init__(self, control_mapping: ControlMapping | None = None) -> None:
|
||||
self.control_mapping = control_mapping or ControlMapping()
|
||||
self.evidence: list[EvidenceRecord] = []
|
||||
|
||||
def record(
|
||||
self,
|
||||
event_type: str,
|
||||
agent: str,
|
||||
action: str,
|
||||
verdict: str,
|
||||
risk_score: int = 0,
|
||||
risk_level: str = "low",
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> EvidenceRecord:
|
||||
"""Record a governance evidence entry.
|
||||
|
||||
Args:
|
||||
event_type: Type of governance event (e.g., policy_evaluation, access_request)
|
||||
agent: Agent identifier
|
||||
action: Action being performed
|
||||
verdict: Policy verdict (allow/deny/escalate)
|
||||
risk_score: Numeric risk score (0-10)
|
||||
risk_level: Risk level string
|
||||
details: Additional context
|
||||
"""
|
||||
controls = self.control_mapping.get_controls(event_type)
|
||||
|
||||
record = EvidenceRecord(
|
||||
timestamp=datetime.now(timezone.utc).isoformat(),
|
||||
event_type=event_type,
|
||||
agent=agent,
|
||||
action=action,
|
||||
verdict=verdict,
|
||||
risk_score=risk_score,
|
||||
risk_level=risk_level,
|
||||
controls=controls,
|
||||
details=details or {},
|
||||
)
|
||||
|
||||
self.evidence.append(record)
|
||||
logger.info(
|
||||
"Evidence recorded: %s by %s → %s (risk: %d/%s, controls: %s)",
|
||||
action, agent, verdict, risk_score, risk_level, controls,
|
||||
)
|
||||
return record
|
||||
|
||||
def get_evidence(
|
||||
self,
|
||||
agent: str | None = None,
|
||||
control: str | None = None,
|
||||
verdict: str | None = None,
|
||||
) -> list[EvidenceRecord]:
|
||||
"""Query evidence with optional filters."""
|
||||
results = self.evidence
|
||||
if agent:
|
||||
results = [e for e in results if e.agent == agent]
|
||||
if control:
|
||||
results = [e for e in results if control in e.controls]
|
||||
if verdict:
|
||||
results = [e for e in results if e.verdict == verdict]
|
||||
return results
|
||||
|
||||
def export_json(self) -> str:
|
||||
"""Export all evidence as JSON."""
|
||||
return json.dumps([e.to_dict() for e in self.evidence], indent=2)
|
||||
46
cortex/governance/policies/data-access.yaml
Normal file
46
cortex/governance/policies/data-access.yaml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# Data Access Policy
|
||||
# Defines who can access which data classifications.
|
||||
|
||||
name: data-access
|
||||
description: Controls agent access to data based on classification and role
|
||||
version: "1.0.0"
|
||||
|
||||
rules:
|
||||
- name: deny-external-restricted
|
||||
description: External agents cannot access restricted data
|
||||
conditions:
|
||||
agent_role: external
|
||||
data_type: restricted
|
||||
effect: deny
|
||||
priority: 100
|
||||
|
||||
- name: escalate-confidential-external
|
||||
description: Confidential data going external requires escalation
|
||||
conditions:
|
||||
data_type: confidential
|
||||
target: external
|
||||
effect: escalate
|
||||
priority: 90
|
||||
|
||||
- name: deny-restricted-external
|
||||
description: Restricted data must never leave internal systems
|
||||
conditions:
|
||||
data_type: restricted
|
||||
target: external
|
||||
effect: deny
|
||||
priority: 100
|
||||
|
||||
- name: allow-public-any
|
||||
description: Public data can be accessed by anyone
|
||||
conditions:
|
||||
data_type: public
|
||||
effect: allow
|
||||
priority: 10
|
||||
|
||||
- name: allow-internal-internal
|
||||
description: Internal data accessible within internal systems
|
||||
conditions:
|
||||
data_type: internal
|
||||
target: internal
|
||||
effect: allow
|
||||
priority: 50
|
||||
40
cortex/governance/policies/external-comms.yaml
Normal file
40
cortex/governance/policies/external-comms.yaml
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# External Communications Policy
|
||||
# Controls when and how agents can communicate externally.
|
||||
|
||||
name: external-comms
|
||||
description: Governs agent communication with external systems and parties
|
||||
version: "1.0.0"
|
||||
|
||||
rules:
|
||||
- name: deny-assistant-external-email
|
||||
description: Assistants cannot send external emails without escalation
|
||||
conditions:
|
||||
agent_role: assistant
|
||||
action: send_email
|
||||
target: external
|
||||
effect: escalate
|
||||
priority: 80
|
||||
|
||||
- name: allow-operator-external
|
||||
description: Operators may communicate externally
|
||||
conditions:
|
||||
agent_role: operator
|
||||
target: external
|
||||
effect: allow
|
||||
priority: 70
|
||||
|
||||
- name: deny-external-api-restricted
|
||||
description: No external API calls with restricted data
|
||||
conditions:
|
||||
action: api_call
|
||||
target: external
|
||||
data_type: restricted
|
||||
effect: deny
|
||||
priority: 100
|
||||
|
||||
- name: allow-internal-comms
|
||||
description: Internal communication is always allowed
|
||||
conditions:
|
||||
target: internal
|
||||
effect: allow
|
||||
priority: 10
|
||||
42
cortex/governance/policies/financial-data.yaml
Normal file
42
cortex/governance/policies/financial-data.yaml
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# Financial Data Policy (BaFin-relevant)
|
||||
# Strict controls for financial data handling.
|
||||
|
||||
name: financial-data
|
||||
description: BaFin-compliant financial data governance
|
||||
version: "1.0.0"
|
||||
|
||||
rules:
|
||||
- name: deny-financial-external
|
||||
description: Financial data must not leave internal systems
|
||||
conditions:
|
||||
data_type: restricted
|
||||
action: export_data
|
||||
target: external
|
||||
effect: deny
|
||||
priority: 100
|
||||
|
||||
- name: escalate-financial-access
|
||||
description: All access to financial data requires escalation
|
||||
conditions:
|
||||
data_type: restricted
|
||||
action: read_financial
|
||||
effect: escalate
|
||||
priority: 95
|
||||
|
||||
- name: deny-financial-offhours
|
||||
description: Financial operations blocked outside business hours
|
||||
conditions:
|
||||
data_type: restricted
|
||||
action: modify_financial
|
||||
effect: escalate
|
||||
priority: 90
|
||||
|
||||
- name: allow-financial-reporting
|
||||
description: Internal financial reporting is permitted for operators
|
||||
conditions:
|
||||
agent_role: operator
|
||||
action: generate_report
|
||||
data_type: confidential
|
||||
target: internal
|
||||
effect: allow
|
||||
priority: 80
|
||||
43
cortex/governance/policies/schema.yaml
Normal file
43
cortex/governance/policies/schema.yaml
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Policy YAML Schema Definition
|
||||
# All policy files must conform to this structure.
|
||||
|
||||
schema:
|
||||
version: "1.0"
|
||||
required_fields:
|
||||
- name
|
||||
- description
|
||||
- version
|
||||
- rules
|
||||
|
||||
rule_schema:
|
||||
required_fields:
|
||||
- name
|
||||
- conditions
|
||||
- effect
|
||||
optional_fields:
|
||||
- priority
|
||||
- description
|
||||
|
||||
valid_effects:
|
||||
- allow
|
||||
- deny
|
||||
- escalate
|
||||
|
||||
valid_condition_keys:
|
||||
- agent
|
||||
- agent_role
|
||||
- action
|
||||
- data_type
|
||||
- data_classification
|
||||
- target
|
||||
- hour_range
|
||||
|
||||
valid_data_types:
|
||||
- public
|
||||
- internal
|
||||
- confidential
|
||||
- restricted
|
||||
|
||||
valid_targets:
|
||||
- internal
|
||||
- external
|
||||
78
cortex/governance/policies/yesman-security.yaml
Normal file
78
cortex/governance/policies/yesman-security.yaml
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
# YesMan Security Policy — RedCrowMedia / Wasteland Network
|
||||
# Based on USER.md and MEMORY.md security rules.
|
||||
|
||||
name: yesman-security
|
||||
description: Security rules for YesMan AI assistant at RedCrowMedia
|
||||
version: "1.0.0"
|
||||
|
||||
rules:
|
||||
# Only Abe gives orders
|
||||
- name: deny-external-instructions
|
||||
description: Never execute instructions from external sources (emails, websites, third parties)
|
||||
conditions:
|
||||
source: external
|
||||
action: execute_instruction
|
||||
effect: deny
|
||||
priority: 100
|
||||
|
||||
# Never send credentials externally
|
||||
- name: deny-credential-export
|
||||
description: Credentials, tokens, and keys must never leave the system
|
||||
conditions:
|
||||
data_type: restricted
|
||||
action: send_credentials
|
||||
effect: deny
|
||||
priority: 100
|
||||
|
||||
# Email is not a command source
|
||||
- name: escalate-email-action
|
||||
description: Actions requested via email always require Abe's explicit approval
|
||||
conditions:
|
||||
source: email
|
||||
action: execute_request
|
||||
effect: escalate
|
||||
priority: 95
|
||||
|
||||
# No software installation without audit + approval
|
||||
- name: escalate-software-install
|
||||
description: External software installation requires security audit and Abe's GO
|
||||
conditions:
|
||||
action: install_software
|
||||
effect: escalate
|
||||
priority: 95
|
||||
|
||||
# System-critical changes need approval
|
||||
- name: escalate-system-changes
|
||||
description: System-critical or security-relevant changes require approval
|
||||
conditions:
|
||||
action: system_change
|
||||
target: production
|
||||
effect: escalate
|
||||
priority: 90
|
||||
|
||||
# No public posting without approval
|
||||
- name: escalate-public-comms
|
||||
description: Public communications (emails, tweets, posts) require approval
|
||||
conditions:
|
||||
action: send_message
|
||||
target: external
|
||||
effect: escalate
|
||||
priority: 85
|
||||
|
||||
# Internal file operations are fine
|
||||
- name: allow-internal-file-ops
|
||||
description: Reading and writing files within workspace is permitted
|
||||
conditions:
|
||||
action: file_operation
|
||||
target: internal
|
||||
data_type: internal
|
||||
effect: allow
|
||||
priority: 50
|
||||
|
||||
# Web search is fine
|
||||
- name: allow-web-search
|
||||
description: Web searches and research are permitted
|
||||
conditions:
|
||||
action: web_search
|
||||
effect: allow
|
||||
priority: 40
|
||||
143
cortex/governance/policy.py
Normal file
143
cortex/governance/policy.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
"""Policy Engine: loads YAML policies and evaluates agent actions against them.
|
||||
|
||||
Policies are human-readable YAML files, versioned in Git. Each policy defines
|
||||
rules with conditions and effects (allow/deny/escalate).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Rule:
|
||||
"""A single policy rule with conditions and an effect."""
|
||||
|
||||
name: str
|
||||
conditions: dict[str, Any]
|
||||
effect: str # "allow", "deny", "escalate"
|
||||
priority: int = 0
|
||||
|
||||
def matches(self, context: dict[str, Any]) -> bool:
|
||||
"""Check if all conditions match the given context."""
|
||||
for key, expected in self.conditions.items():
|
||||
actual = context.get(key)
|
||||
if actual is None:
|
||||
return False
|
||||
if isinstance(expected, list):
|
||||
if actual not in expected:
|
||||
return False
|
||||
elif actual != expected:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class Policy:
|
||||
"""A named policy containing ordered rules."""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
version: str
|
||||
rules: list[Rule] = field(default_factory=list)
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Loads and evaluates YAML-based governance policies.
|
||||
|
||||
Usage:
|
||||
engine = PolicyEngine(policies_dir="policies/")
|
||||
result = engine.evaluate(action_context)
|
||||
"""
|
||||
|
||||
def __init__(self, policies_dir: str | None = None) -> None:
|
||||
self.policies_dir = Path(
|
||||
policies_dir or os.environ.get("GOVERNANCE_POLICIES_DIR", "policies/")
|
||||
)
|
||||
self.policies: list[Policy] = []
|
||||
self._load_policies()
|
||||
|
||||
def _load_policies(self) -> None:
|
||||
"""Load all YAML policy files from the policies directory."""
|
||||
if not self.policies_dir.exists():
|
||||
logger.warning("Policies directory not found: %s", self.policies_dir)
|
||||
return
|
||||
|
||||
for path in sorted(self.policies_dir.glob("*.yaml")):
|
||||
if path.name == "schema.yaml":
|
||||
continue
|
||||
try:
|
||||
policy = self._parse_policy(path)
|
||||
self.policies.append(policy)
|
||||
logger.info("Loaded policy: %s (%d rules)", policy.name, len(policy.rules))
|
||||
except Exception:
|
||||
logger.exception("Failed to load policy: %s", path)
|
||||
|
||||
def _parse_policy(self, path: Path) -> Policy:
|
||||
"""Parse a YAML file into a Policy object."""
|
||||
with open(path, "r") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
rules = []
|
||||
for rule_data in data.get("rules", []):
|
||||
rules.append(
|
||||
Rule(
|
||||
name=rule_data["name"],
|
||||
conditions=rule_data.get("conditions", {}),
|
||||
effect=rule_data.get("effect", "deny"),
|
||||
priority=rule_data.get("priority", 0),
|
||||
)
|
||||
)
|
||||
|
||||
return Policy(
|
||||
name=data.get("name", path.stem),
|
||||
description=data.get("description", ""),
|
||||
version=data.get("version", "1.0.0"),
|
||||
rules=rules,
|
||||
)
|
||||
|
||||
def evaluate(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Evaluate an action context against all loaded policies.
|
||||
|
||||
Returns the highest-priority matching rule's effect, or 'allow' if no rules match.
|
||||
"""
|
||||
matches: list[tuple[Rule, Policy]] = []
|
||||
|
||||
for policy in self.policies:
|
||||
for rule in policy.rules:
|
||||
if rule.matches(context):
|
||||
matches.append((rule, policy))
|
||||
|
||||
if not matches:
|
||||
return {
|
||||
"verdict": "allow",
|
||||
"reason": "No matching policy rules",
|
||||
"matched_rules": [],
|
||||
}
|
||||
|
||||
# Sort by priority (highest first), then by strictness (deny > escalate > allow)
|
||||
effect_order = {"deny": 0, "escalate": 1, "allow": 2}
|
||||
matches.sort(key=lambda m: (-m[0].priority, effect_order.get(m[0].effect, 2)))
|
||||
|
||||
top_rule, top_policy = matches[0]
|
||||
return {
|
||||
"verdict": top_rule.effect,
|
||||
"reason": f"Policy '{top_policy.name}', rule '{top_rule.name}'",
|
||||
"matched_rules": [
|
||||
{"policy": p.name, "rule": r.name, "effect": r.effect}
|
||||
for r, p in matches
|
||||
],
|
||||
}
|
||||
|
||||
def reload(self) -> None:
|
||||
"""Reload all policies from disk."""
|
||||
self.policies.clear()
|
||||
self._load_policies()
|
||||
109
cortex/governance/report_generator.py
Normal file
109
cortex/governance/report_generator.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
"""Audit Report Generator: creates compliance reports from collected evidence.
|
||||
|
||||
Generates structured reports grouped by ISO 27001 controls, time periods,
|
||||
and agent activity.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from governance.evidence import EvidenceCollector, EvidenceRecord
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReportSection:
|
||||
"""A section of an audit report."""
|
||||
|
||||
title: str
|
||||
entries: list[dict[str, Any]]
|
||||
summary: dict[str, Any]
|
||||
|
||||
|
||||
class ReportGenerator:
|
||||
"""Generates audit reports from governance evidence.
|
||||
|
||||
Usage:
|
||||
generator = ReportGenerator(evidence_collector)
|
||||
report = generator.generate_compliance_report()
|
||||
"""
|
||||
|
||||
def __init__(self, evidence_collector: EvidenceCollector) -> None:
|
||||
self.collector = evidence_collector
|
||||
|
||||
def generate_compliance_report(self) -> dict[str, Any]:
|
||||
"""Generate a full compliance report grouped by ISO 27001 controls."""
|
||||
evidence = self.collector.evidence
|
||||
if not evidence:
|
||||
return {"generated_at": _now_iso(), "status": "no_evidence", "sections": []}
|
||||
|
||||
by_control: dict[str, list[EvidenceRecord]] = defaultdict(list)
|
||||
for record in evidence:
|
||||
for control in record.controls:
|
||||
by_control[control].append(record)
|
||||
|
||||
sections = []
|
||||
for control_id in sorted(by_control.keys()):
|
||||
records = by_control[control_id]
|
||||
sections.append({
|
||||
"control": control_id,
|
||||
"total_events": len(records),
|
||||
"verdicts": _count_verdicts(records),
|
||||
"risk_distribution": _count_risk_levels(records),
|
||||
"agents": list({r.agent for r in records}),
|
||||
})
|
||||
|
||||
return {
|
||||
"generated_at": _now_iso(),
|
||||
"total_evidence": len(evidence),
|
||||
"controls_covered": list(sorted(by_control.keys())),
|
||||
"summary": {
|
||||
"total_deny": sum(1 for e in evidence if e.verdict == "deny"),
|
||||
"total_escalate": sum(1 for e in evidence if e.verdict == "escalate"),
|
||||
"total_allow": sum(1 for e in evidence if e.verdict == "allow"),
|
||||
"high_risk_events": sum(1 for e in evidence if e.risk_score >= 7),
|
||||
},
|
||||
"sections": sections,
|
||||
}
|
||||
|
||||
def generate_agent_report(self, agent: str) -> dict[str, Any]:
|
||||
"""Generate a report for a specific agent."""
|
||||
evidence = self.collector.get_evidence(agent=agent)
|
||||
return {
|
||||
"generated_at": _now_iso(),
|
||||
"agent": agent,
|
||||
"total_actions": len(evidence),
|
||||
"verdicts": _count_verdicts(evidence),
|
||||
"risk_distribution": _count_risk_levels(evidence),
|
||||
"actions": [e.to_dict() for e in evidence],
|
||||
}
|
||||
|
||||
def export_json(self) -> str:
|
||||
"""Export the compliance report as formatted JSON."""
|
||||
report = self.generate_compliance_report()
|
||||
return json.dumps(report, indent=2)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _count_verdicts(records: list[EvidenceRecord]) -> dict[str, int]:
|
||||
counts: dict[str, int] = defaultdict(int)
|
||||
for r in records:
|
||||
counts[r.verdict] += 1
|
||||
return dict(counts)
|
||||
|
||||
|
||||
def _count_risk_levels(records: list[EvidenceRecord]) -> dict[str, int]:
|
||||
counts: dict[str, int] = defaultdict(int)
|
||||
for r in records:
|
||||
counts[r.risk_level] += 1
|
||||
return dict(counts)
|
||||
126
cortex/governance/risk_scorer.py
Normal file
126
cortex/governance/risk_scorer.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Risk Scorer: context-based risk scoring for agent actions.
|
||||
|
||||
Risk levels:
|
||||
- low (0-3): routine operations
|
||||
- elevated (4-6): notable but acceptable
|
||||
- high (7-8): requires escalation
|
||||
- critical (9-10): auto-deny + alert
|
||||
|
||||
Factors: data classification, target (internal/external), agent role, time of day.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Data classification weights
|
||||
DATA_WEIGHTS: dict[str, int] = {
|
||||
"public": 0,
|
||||
"internal": 2,
|
||||
"confidential": 5,
|
||||
"restricted": 8,
|
||||
}
|
||||
|
||||
# Target weights
|
||||
TARGET_WEIGHTS: dict[str, int] = {
|
||||
"internal": 0,
|
||||
"external": 3,
|
||||
}
|
||||
|
||||
# Agent role weights (lower = more trusted)
|
||||
ROLE_WEIGHTS: dict[str, int] = {
|
||||
"admin": -1,
|
||||
"operator": 0,
|
||||
"assistant": 1,
|
||||
"external": 3,
|
||||
}
|
||||
|
||||
# Off-hours bonus (outside 8-18)
|
||||
OFF_HOURS_BONUS = 2
|
||||
|
||||
|
||||
@dataclass
|
||||
class RiskResult:
|
||||
"""Result of a risk assessment."""
|
||||
|
||||
value: int
|
||||
level: str
|
||||
factors: dict[str, Any]
|
||||
|
||||
@property
|
||||
def is_acceptable(self) -> bool:
|
||||
return self.value <= 6
|
||||
|
||||
|
||||
def _classify_level(score: int) -> str:
|
||||
"""Map a numeric score to a risk level."""
|
||||
if score <= 3:
|
||||
return "low"
|
||||
elif score <= 6:
|
||||
return "elevated"
|
||||
elif score <= 8:
|
||||
return "high"
|
||||
else:
|
||||
return "critical"
|
||||
|
||||
|
||||
class RiskScorer:
|
||||
"""Calculates contextual risk scores for agent actions.
|
||||
|
||||
Usage:
|
||||
scorer = RiskScorer()
|
||||
result = scorer.score({"data_type": "confidential", "target": "external"})
|
||||
"""
|
||||
|
||||
def score(self, context: dict[str, Any]) -> RiskResult:
|
||||
"""Score an action context and return a RiskResult.
|
||||
|
||||
Args:
|
||||
context: Dict with optional keys:
|
||||
- data_type: public|internal|confidential|restricted
|
||||
- target: internal|external
|
||||
- agent_role: admin|operator|assistant|external
|
||||
- hour: 0-23 (defaults to current hour UTC)
|
||||
"""
|
||||
factors: dict[str, Any] = {}
|
||||
total = 0
|
||||
|
||||
# Data classification
|
||||
data_type = context.get("data_type", "public")
|
||||
data_score = DATA_WEIGHTS.get(data_type, 0)
|
||||
factors["data_type"] = {"value": data_type, "score": data_score}
|
||||
total += data_score
|
||||
|
||||
# Target
|
||||
target = context.get("target", "internal")
|
||||
target_score = TARGET_WEIGHTS.get(target, 0)
|
||||
factors["target"] = {"value": target, "score": target_score}
|
||||
total += target_score
|
||||
|
||||
# Agent role
|
||||
role = context.get("agent_role", "assistant")
|
||||
role_score = ROLE_WEIGHTS.get(role, 1)
|
||||
factors["agent_role"] = {"value": role, "score": role_score}
|
||||
total += role_score
|
||||
|
||||
# Time of day
|
||||
hour = context.get("hour")
|
||||
if hour is None:
|
||||
hour = datetime.now(timezone.utc).hour
|
||||
is_off_hours = hour < 8 or hour >= 18
|
||||
time_score = OFF_HOURS_BONUS if is_off_hours else 0
|
||||
factors["time_of_day"] = {"hour": hour, "off_hours": is_off_hours, "score": time_score}
|
||||
total += time_score
|
||||
|
||||
# Clamp to 0-10
|
||||
total = max(0, min(10, total))
|
||||
|
||||
level = _classify_level(total)
|
||||
logger.debug("Risk score: %d (%s) — factors: %s", total, level, factors)
|
||||
|
||||
return RiskResult(value=total, level=level, factors=factors)
|
||||
0
cortex/intelligence/__init__.py
Normal file
0
cortex/intelligence/__init__.py
Normal file
193
cortex/intelligence/anticipator.py
Normal file
193
cortex/intelligence/anticipator.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
"""Proactive Intelligence: pattern-based predictions and anticipation.
|
||||
|
||||
Detects patterns in historical events and generates proactive alerts:
|
||||
- SSL certificate expiry approaching
|
||||
- Recurring issues (same error pattern at predictable intervals)
|
||||
- Usage pattern anomalies
|
||||
- Resource exhaustion trends
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum
|
||||
from typing import Any, Callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AlertSeverity(Enum):
|
||||
INFO = "info"
|
||||
WARNING = "warning"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Prediction:
|
||||
"""A proactive prediction about a future event."""
|
||||
|
||||
pattern_name: str
|
||||
description: str
|
||||
severity: AlertSeverity
|
||||
predicted_time: datetime | None = None
|
||||
confidence: float = 0.0 # 0.0-1.0
|
||||
recommended_action: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PatternDefinition:
|
||||
"""Definition of a detectable pattern."""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
detector: Callable[[list[dict[str, Any]]], Prediction | None]
|
||||
|
||||
|
||||
class Anticipator:
|
||||
"""Proactive intelligence engine that detects patterns and generates predictions.
|
||||
|
||||
Usage:
|
||||
anticipator = Anticipator()
|
||||
anticipator.register_pattern(ssl_expiry_pattern)
|
||||
predictions = anticipator.analyze(events)
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.patterns: list[PatternDefinition] = []
|
||||
self._register_builtin_patterns()
|
||||
|
||||
def register_pattern(self, pattern: PatternDefinition) -> None:
|
||||
"""Register a new pattern detector."""
|
||||
self.patterns.append(pattern)
|
||||
logger.info("Registered pattern: %s", pattern.name)
|
||||
|
||||
def analyze(self, events: list[dict[str, Any]]) -> list[Prediction]:
|
||||
"""Analyze events against all registered patterns.
|
||||
|
||||
Args:
|
||||
events: List of event dicts with at minimum 'timestamp', 'type', 'data'.
|
||||
|
||||
Returns:
|
||||
List of predictions, sorted by severity (critical first).
|
||||
"""
|
||||
predictions: list[Prediction] = []
|
||||
|
||||
for pattern in self.patterns:
|
||||
try:
|
||||
prediction = pattern.detector(events)
|
||||
if prediction:
|
||||
predictions.append(prediction)
|
||||
logger.info(
|
||||
"Pattern detected: %s (severity: %s, confidence: %.2f)",
|
||||
prediction.pattern_name,
|
||||
prediction.severity.value,
|
||||
prediction.confidence,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Pattern detector failed: %s", pattern.name)
|
||||
|
||||
# Sort: critical first, then by confidence
|
||||
severity_order = {AlertSeverity.CRITICAL: 0, AlertSeverity.WARNING: 1, AlertSeverity.INFO: 2}
|
||||
predictions.sort(key=lambda p: (severity_order.get(p.severity, 3), -p.confidence))
|
||||
|
||||
return predictions
|
||||
|
||||
def _register_builtin_patterns(self) -> None:
|
||||
"""Register built-in pattern detectors."""
|
||||
self.register_pattern(PatternDefinition(
|
||||
name="ssl_cert_expiry",
|
||||
description="Detects SSL certificates approaching expiry",
|
||||
detector=_detect_ssl_expiry,
|
||||
))
|
||||
self.register_pattern(PatternDefinition(
|
||||
name="recurring_error",
|
||||
description="Detects recurring error patterns",
|
||||
detector=_detect_recurring_errors,
|
||||
))
|
||||
self.register_pattern(PatternDefinition(
|
||||
name="usage_spike",
|
||||
description="Detects unusual usage spikes",
|
||||
detector=_detect_usage_spike,
|
||||
))
|
||||
|
||||
|
||||
def _detect_ssl_expiry(events: list[dict[str, Any]]) -> Prediction | None:
|
||||
"""Detect SSL certificates that will expire within 14 days."""
|
||||
now = datetime.now(timezone.utc)
|
||||
threshold = timedelta(days=14)
|
||||
|
||||
for event in events:
|
||||
if event.get("type") != "ssl_cert_check":
|
||||
continue
|
||||
expiry_str = event.get("data", {}).get("expiry")
|
||||
if not expiry_str:
|
||||
continue
|
||||
try:
|
||||
expiry = datetime.fromisoformat(expiry_str)
|
||||
if expiry.tzinfo is None:
|
||||
expiry = expiry.replace(tzinfo=timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
remaining = expiry - now
|
||||
if remaining < threshold:
|
||||
domain = event.get("data", {}).get("domain", "unknown")
|
||||
severity = AlertSeverity.CRITICAL if remaining.days < 3 else AlertSeverity.WARNING
|
||||
return Prediction(
|
||||
pattern_name="ssl_cert_expiry",
|
||||
description=f"SSL certificate for {domain} expires in {remaining.days} days",
|
||||
severity=severity,
|
||||
predicted_time=expiry,
|
||||
confidence=0.95,
|
||||
recommended_action=f"Renew SSL certificate for {domain}",
|
||||
metadata={"domain": domain, "days_remaining": remaining.days},
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _detect_recurring_errors(events: list[dict[str, Any]]) -> Prediction | None:
|
||||
"""Detect recurring error patterns (same error type appearing 3+ times)."""
|
||||
error_counts: dict[str, int] = {}
|
||||
for event in events:
|
||||
if event.get("type") == "error":
|
||||
error_key = event.get("data", {}).get("error_type", "unknown")
|
||||
error_counts[error_key] = error_counts.get(error_key, 0) + 1
|
||||
|
||||
for error_type, count in error_counts.items():
|
||||
if count >= 3:
|
||||
return Prediction(
|
||||
pattern_name="recurring_error",
|
||||
description=f"Recurring error '{error_type}' detected ({count} occurrences)",
|
||||
severity=AlertSeverity.WARNING,
|
||||
confidence=min(0.5 + count * 0.1, 0.95),
|
||||
recommended_action=f"Investigate root cause of '{error_type}'",
|
||||
metadata={"error_type": error_type, "count": count},
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _detect_usage_spike(events: list[dict[str, Any]]) -> Prediction | None:
|
||||
"""Detect unusual usage spikes (>2x average in recent window)."""
|
||||
usage_events = [e for e in events if e.get("type") == "usage_metric"]
|
||||
if len(usage_events) < 10:
|
||||
return None
|
||||
|
||||
values = [e.get("data", {}).get("value", 0) for e in usage_events]
|
||||
avg = sum(values) / len(values)
|
||||
recent = values[-3:] if len(values) >= 3 else values
|
||||
recent_avg = sum(recent) / len(recent) if recent else 0
|
||||
|
||||
if avg > 0 and recent_avg > avg * 2:
|
||||
return Prediction(
|
||||
pattern_name="usage_spike",
|
||||
description=f"Usage spike detected: recent avg {recent_avg:.1f} vs overall {avg:.1f}",
|
||||
severity=AlertSeverity.WARNING,
|
||||
confidence=0.7,
|
||||
recommended_action="Investigate usage spike — potential anomaly or load increase",
|
||||
metadata={"average": avg, "recent_average": recent_avg, "ratio": recent_avg / avg},
|
||||
)
|
||||
return None
|
||||
154
cortex/intelligence/collective.py
Normal file
154
cortex/intelligence/collective.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
"""Collective Learning: aggregates patterns across all internal agents.
|
||||
|
||||
Subscribes to the shared memory bus, collects insights from all
|
||||
Vainplex-internal agents, and builds an aggregated knowledge base
|
||||
for pattern detection and cross-agent learning.
|
||||
|
||||
🚨 STRICT DATA ISOLATION: Only Vainplex-internal agents participate.
|
||||
No customer data. No customer agent insights. Ever.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregatedPattern:
|
||||
"""A pattern detected across multiple agents."""
|
||||
|
||||
topic: str
|
||||
description: str
|
||||
contributing_agents: list[str]
|
||||
confidence: float
|
||||
occurrence_count: int
|
||||
first_seen: str
|
||||
last_seen: str
|
||||
insights: list[Insight] = field(default_factory=list)
|
||||
|
||||
|
||||
class CollectiveLearning:
|
||||
"""Aggregates patterns from all internal agents into collective knowledge.
|
||||
|
||||
Usage:
|
||||
collective = CollectiveLearning(shared_memory)
|
||||
await collective.start()
|
||||
patterns = collective.get_patterns()
|
||||
|
||||
⚠️ DATA ISOLATION: Only processes insights from ALLOWED_AGENTS.
|
||||
"""
|
||||
|
||||
def __init__(self, shared_memory: SharedMemory) -> None:
|
||||
self.shared_memory = shared_memory
|
||||
self._insights_by_topic: dict[str, list[Insight]] = defaultdict(list)
|
||||
self._patterns: list[AggregatedPattern] = []
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start listening for insights on all topics."""
|
||||
await self.shared_memory.subscribe(">", self._handle_insight)
|
||||
logger.info("Collective learning started — listening for insights")
|
||||
|
||||
async def _handle_insight(self, insight: Insight) -> None:
|
||||
"""Process an incoming insight."""
|
||||
# Double-check data isolation
|
||||
if insight.agent not in ALLOWED_AGENTS:
|
||||
logger.warning("Rejected insight from non-internal agent: %s", insight.agent)
|
||||
return
|
||||
|
||||
self._insights_by_topic[insight.topic].append(insight)
|
||||
logger.debug(
|
||||
"Collected insight: %s from %s (topic: %s)",
|
||||
insight.content[:60], insight.agent, insight.topic,
|
||||
)
|
||||
|
||||
# Re-analyze patterns when new data arrives
|
||||
self._detect_patterns()
|
||||
|
||||
def _detect_patterns(self) -> None:
|
||||
"""Analyze collected insights to find cross-agent patterns."""
|
||||
new_patterns: list[AggregatedPattern] = []
|
||||
|
||||
for topic, insights in self._insights_by_topic.items():
|
||||
if len(insights) < 2:
|
||||
continue
|
||||
|
||||
agents = list({i.agent for i in insights})
|
||||
if len(agents) < 2:
|
||||
# Single-agent observations aren't "collective" patterns
|
||||
continue
|
||||
|
||||
timestamps = sorted(i.timestamp for i in insights)
|
||||
avg_confidence = sum(i.confidence for i in insights) / len(insights)
|
||||
|
||||
pattern = AggregatedPattern(
|
||||
topic=topic,
|
||||
description=f"Cross-agent pattern on '{topic}' observed by {', '.join(agents)}",
|
||||
contributing_agents=agents,
|
||||
confidence=avg_confidence,
|
||||
occurrence_count=len(insights),
|
||||
first_seen=timestamps[0],
|
||||
last_seen=timestamps[-1],
|
||||
insights=insights,
|
||||
)
|
||||
new_patterns.append(pattern)
|
||||
|
||||
self._patterns = new_patterns
|
||||
|
||||
def get_patterns(
|
||||
self,
|
||||
topic: str | None = None,
|
||||
min_confidence: float = 0.0,
|
||||
) -> list[AggregatedPattern]:
|
||||
"""Retrieve detected collective patterns.
|
||||
|
||||
Args:
|
||||
topic: Filter by topic (optional).
|
||||
min_confidence: Minimum confidence threshold.
|
||||
"""
|
||||
patterns = self._patterns
|
||||
if topic:
|
||||
patterns = [p for p in patterns if p.topic == topic]
|
||||
if min_confidence > 0:
|
||||
patterns = [p for p in patterns if p.confidence >= min_confidence]
|
||||
return patterns
|
||||
|
||||
def get_topic_summary(self) -> dict[str, Any]:
|
||||
"""Get a summary of all topics and their insight counts."""
|
||||
return {
|
||||
topic: {
|
||||
"count": len(insights),
|
||||
"agents": list({i.agent for i in insights}),
|
||||
"latest": max(i.timestamp for i in insights) if insights else None,
|
||||
}
|
||||
for topic, insights in self._insights_by_topic.items()
|
||||
}
|
||||
|
||||
def export_knowledge(self) -> str:
|
||||
"""Export collective knowledge as JSON."""
|
||||
return json.dumps({
|
||||
"exported_at": datetime.now(timezone.utc).isoformat(),
|
||||
"allowed_agents": sorted(ALLOWED_AGENTS),
|
||||
"patterns": [
|
||||
{
|
||||
"topic": p.topic,
|
||||
"description": p.description,
|
||||
"contributing_agents": p.contributing_agents,
|
||||
"confidence": p.confidence,
|
||||
"occurrence_count": p.occurrence_count,
|
||||
"first_seen": p.first_seen,
|
||||
"last_seen": p.last_seen,
|
||||
}
|
||||
for p in self._patterns
|
||||
],
|
||||
"topics": self.get_topic_summary(),
|
||||
}, indent=2)
|
||||
420
cortex/intelligence/knowledge_cleanup.py
Normal file
420
cortex/intelligence/knowledge_cleanup.py
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Knowledge graph cleanup: classify unknowns, deduplicate entities, score relationships.
|
||||
|
||||
Usage:
|
||||
darkplex cleanup [--classify] [--dedupe] [--score] [--dry-run]
|
||||
|
||||
If no flags given, runs all three steps.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
log = logging.getLogger("knowledge_cleanup")
|
||||
|
||||
KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
|
||||
ENTITIES_PATH = KNOWLEDGE_DIR / "entities.json"
|
||||
RELATIONSHIPS_PATH = KNOWLEDGE_DIR / "relationships.json"
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
OLLAMA_MODEL = "qwen2.5:7b"
|
||||
|
||||
VALID_TYPES = {"person", "organization", "company", "project", "technology",
|
||||
"location", "event", "concept", "product"}
|
||||
|
||||
|
||||
def backup(path: Path) -> Path:
|
||||
"""Create timestamped backup."""
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
backup_path = path.with_suffix(f".backup_{ts}.json")
|
||||
shutil.copy2(path, backup_path)
|
||||
log.info(f"Backed up {path.name} → {backup_path.name}")
|
||||
return backup_path
|
||||
|
||||
|
||||
def atomic_write(path: Path, data):
|
||||
"""Write JSON atomically via temp file."""
|
||||
tmp = path.with_suffix(".tmp")
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
tmp.replace(path)
|
||||
log.info(f"Wrote {path.name}")
|
||||
|
||||
|
||||
def load_entities() -> dict:
|
||||
with open(ENTITIES_PATH) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_relationships() -> dict:
|
||||
with open(RELATIONSHIPS_PATH) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def ollama_generate(prompt: str, timeout: int = 120) -> str:
|
||||
"""Call Ollama generate API."""
|
||||
resp = requests.post(f"{OLLAMA_URL}/api/generate", json={
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 2000}
|
||||
}, timeout=timeout)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("response", "")
|
||||
|
||||
|
||||
# ─── Task 1: Classify Unknowns ───────────────────────────────────────────────
|
||||
|
||||
def classify_unknowns(entities: dict, dry_run: bool = False) -> dict:
|
||||
"""Classify entities with type='unknown' using LLM."""
|
||||
unknowns = {k: v for k, v in entities.items()
|
||||
if isinstance(v, dict) and v.get("type") == "unknown"}
|
||||
|
||||
if not unknowns:
|
||||
log.info("No unknown entities to classify.")
|
||||
return entities
|
||||
|
||||
log.info(f"Classifying {len(unknowns)} unknown entities...")
|
||||
|
||||
names = list(unknowns.keys())
|
||||
batch_size = 50
|
||||
results = {}
|
||||
|
||||
for i in range(0, len(names), batch_size):
|
||||
batch = names[i:i + batch_size]
|
||||
batch_num = i // batch_size + 1
|
||||
total_batches = (len(names) + batch_size - 1) // batch_size
|
||||
log.info(f"Batch {batch_num}/{total_batches} ({len(batch)} entities)")
|
||||
|
||||
numbered = "\n".join(f"{j+1}. {name}" for j, name in enumerate(batch))
|
||||
prompt = f"""Classify each entity name into exactly one category.
|
||||
Categories: person, organization, company, project, technology, location, event, concept, product
|
||||
|
||||
If a name looks like a person's first name only (e.g. "sarah", "thomas"), classify as person.
|
||||
If it's a common word that isn't clearly an entity (e.g. "ahnung", "wir", "evtl", "schau"), classify as concept.
|
||||
If unsure, classify as concept.
|
||||
|
||||
Respond with ONLY a JSON object mapping the number to the category. Example:
|
||||
{{"1": "person", "2": "company", "3": "concept"}}
|
||||
|
||||
Entities:
|
||||
{numbered}
|
||||
|
||||
JSON:"""
|
||||
|
||||
try:
|
||||
response = ollama_generate(prompt)
|
||||
# Extract JSON from response
|
||||
start = response.find("{")
|
||||
end = response.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
parsed = json.loads(response[start:end])
|
||||
for idx_str, category in parsed.items():
|
||||
idx = int(idx_str) - 1
|
||||
if 0 <= idx < len(batch):
|
||||
cat = category.strip().lower()
|
||||
if cat in VALID_TYPES:
|
||||
results[batch[idx]] = cat
|
||||
except Exception as e:
|
||||
log.warning(f"Batch {batch_num} failed: {e}")
|
||||
continue
|
||||
|
||||
time.sleep(0.5) # Be nice to Ollama
|
||||
|
||||
# Apply results
|
||||
stats = defaultdict(int)
|
||||
for name, new_type in results.items():
|
||||
old_type = entities[name].get("type", "unknown")
|
||||
if old_type != new_type:
|
||||
stats[f"{old_type} → {new_type}"] += 1
|
||||
if not dry_run:
|
||||
entities[name]["type"] = new_type
|
||||
entities[name]["classified_by"] = "llm_cleanup"
|
||||
entities[name]["classified_at"] = datetime.now().isoformat()
|
||||
|
||||
log.info(f"Classified {len(results)}/{len(unknowns)} unknowns:")
|
||||
for transition, count in sorted(stats.items(), key=lambda x: -x[1]):
|
||||
log.info(f" {transition}: {count}")
|
||||
|
||||
remaining = sum(1 for k, v in entities.items()
|
||||
if isinstance(v, dict) and v.get("type") == "unknown")
|
||||
log.info(f"Remaining unknowns: {remaining}")
|
||||
|
||||
return entities
|
||||
|
||||
|
||||
# ─── Task 2: Deduplicate ─────────────────────────────────────────────────────
|
||||
|
||||
def find_duplicates(entities: dict) -> list:
|
||||
"""Find duplicate entity groups via case-insensitive matching."""
|
||||
# Group by normalized name
|
||||
groups = defaultdict(list)
|
||||
for name in entities:
|
||||
normalized = name.strip().lower()
|
||||
groups[normalized].append(name)
|
||||
|
||||
# Also check for substring containment (e.g. "mondo gate" vs "mondo gate ag")
|
||||
names_lower = {name: name.strip().lower() for name in entities}
|
||||
sorted_names = sorted(names_lower.items(), key=lambda x: len(x[1]))
|
||||
|
||||
# Find names where one is a prefix/substring of another
|
||||
substring_pairs = []
|
||||
for i, (name_a, low_a) in enumerate(sorted_names):
|
||||
if len(low_a) < 3:
|
||||
continue
|
||||
for name_b, low_b in sorted_names[i+1:]:
|
||||
if low_a == low_b:
|
||||
continue
|
||||
if low_b.startswith(low_a + " ") or low_b.startswith(low_a + "-"):
|
||||
substring_pairs.append((name_a, name_b))
|
||||
|
||||
# Build merge groups
|
||||
merge_groups = []
|
||||
|
||||
# Exact case duplicates
|
||||
for normalized, names in groups.items():
|
||||
if len(names) > 1:
|
||||
merge_groups.append(names)
|
||||
|
||||
# Substring matches (merge into existing groups or create new)
|
||||
for short, long in substring_pairs:
|
||||
found = False
|
||||
for group in merge_groups:
|
||||
if short in group or long in group:
|
||||
if short not in group:
|
||||
group.append(short)
|
||||
if long not in group:
|
||||
group.append(long)
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
merge_groups.append([short, long])
|
||||
|
||||
return merge_groups
|
||||
|
||||
|
||||
def pick_canonical(names: list, entities: dict) -> str:
|
||||
"""Pick the most detailed entity name as canonical."""
|
||||
# Prefer: longest name, most fields, not all-lowercase
|
||||
def score(name):
|
||||
e = entities.get(name, {})
|
||||
fields = len(e) if isinstance(e, dict) else 0
|
||||
length = len(name)
|
||||
has_upper = int(any(c.isupper() for c in name))
|
||||
return (has_upper, fields, length)
|
||||
|
||||
return max(names, key=score)
|
||||
|
||||
|
||||
def deduplicate(entities: dict, relationships: dict, dry_run: bool = False) -> tuple:
|
||||
"""Deduplicate entities and update relationships."""
|
||||
groups = find_duplicates(entities)
|
||||
|
||||
if not groups:
|
||||
log.info("No duplicates found.")
|
||||
return entities, relationships
|
||||
|
||||
log.info(f"Found {len(groups)} duplicate groups:")
|
||||
|
||||
alias_map = {} # old_name → canonical_name
|
||||
|
||||
for group in groups:
|
||||
canonical = pick_canonical(group, entities)
|
||||
aliases = [n for n in group if n != canonical]
|
||||
|
||||
if not aliases:
|
||||
continue
|
||||
|
||||
log.info(f" Canonical: '{canonical}' ← aliases: {aliases}")
|
||||
|
||||
for alias in aliases:
|
||||
alias_map[alias] = canonical
|
||||
|
||||
if not dry_run:
|
||||
# Merge fields into canonical
|
||||
canonical_entry = entities.get(canonical, {})
|
||||
if not isinstance(canonical_entry, dict):
|
||||
canonical_entry = {}
|
||||
|
||||
existing_aliases = canonical_entry.get("aliases", [])
|
||||
for alias in aliases:
|
||||
if alias not in existing_aliases:
|
||||
existing_aliases.append(alias)
|
||||
alias_entry = entities.get(alias, {})
|
||||
if isinstance(alias_entry, dict):
|
||||
# Merge non-existing fields
|
||||
for k, v in alias_entry.items():
|
||||
if k not in canonical_entry and k not in ("type", "aliases"):
|
||||
canonical_entry[k] = v
|
||||
|
||||
canonical_entry["aliases"] = existing_aliases
|
||||
entities[canonical] = canonical_entry
|
||||
|
||||
# Remove aliases from entities
|
||||
for alias in aliases:
|
||||
if alias in entities:
|
||||
del entities[alias]
|
||||
|
||||
# Update relationships
|
||||
if not dry_run and alias_map:
|
||||
updated_rels = {}
|
||||
remapped = 0
|
||||
for key, rel in relationships.items():
|
||||
a = rel.get("a", "")
|
||||
b = rel.get("b", "")
|
||||
new_a = alias_map.get(a, a)
|
||||
new_b = alias_map.get(b, b)
|
||||
|
||||
if new_a != a or new_b != b:
|
||||
remapped += 1
|
||||
rel["a"] = new_a
|
||||
rel["b"] = new_b
|
||||
|
||||
new_key = f"{new_a}::{new_b}"
|
||||
|
||||
if new_key in updated_rels:
|
||||
# Merge: sum counts, keep latest last_seen
|
||||
existing = updated_rels[new_key]
|
||||
existing["count"] = existing.get("count", 0) + rel.get("count", 0)
|
||||
if rel.get("last_seen", "") > existing.get("last_seen", ""):
|
||||
existing["last_seen"] = rel["last_seen"]
|
||||
if rel.get("first_seen", "") < existing.get("first_seen", ""):
|
||||
existing["first_seen"] = rel["first_seen"]
|
||||
# Merge types
|
||||
existing_types = set(existing.get("types", []))
|
||||
existing_types.update(rel.get("types", []))
|
||||
existing["types"] = list(existing_types)
|
||||
else:
|
||||
updated_rels[new_key] = rel
|
||||
|
||||
log.info(f"Remapped {remapped} relationships, merged {len(relationships) - len(updated_rels)} duplicates")
|
||||
relationships = updated_rels
|
||||
|
||||
log.info(f"Merged {len(alias_map)} aliases into {len(set(alias_map.values()))} canonical entities")
|
||||
|
||||
return entities, relationships
|
||||
|
||||
|
||||
# ─── Task 3: Relationship Scoring ────────────────────────────────────────────
|
||||
|
||||
def score_relationships(relationships: dict, dry_run: bool = False) -> dict:
|
||||
"""Add strength scores and decay old relationships."""
|
||||
now = datetime.now()
|
||||
decay_threshold = now - timedelta(days=30)
|
||||
|
||||
removed = 0
|
||||
scored = 0
|
||||
decayed = 0
|
||||
|
||||
to_remove = []
|
||||
|
||||
for key, rel in relationships.items():
|
||||
count = rel.get("count", 1)
|
||||
last_seen_str = rel.get("last_seen", "")
|
||||
first_seen_str = rel.get("first_seen", "")
|
||||
types = rel.get("types", [])
|
||||
|
||||
# Base strength from count (log scale, capped at 1)
|
||||
import math
|
||||
count_score = min(1.0, math.log(count + 1) / math.log(100))
|
||||
|
||||
# Context diversity: more relationship types = stronger
|
||||
diversity_score = min(1.0, len(types) * 0.3)
|
||||
|
||||
# Recency score
|
||||
recency_score = 1.0
|
||||
if last_seen_str:
|
||||
try:
|
||||
last_seen = datetime.fromisoformat(last_seen_str)
|
||||
days_ago = (now - last_seen).days
|
||||
if days_ago > 30:
|
||||
recency_score = max(0.0, 1.0 - (days_ago - 30) / 180)
|
||||
decayed += 1
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# Combined strength
|
||||
strength = round(
|
||||
count_score * 0.4 + diversity_score * 0.3 + recency_score * 0.3,
|
||||
3
|
||||
)
|
||||
|
||||
if strength < 0.1:
|
||||
to_remove.append(key)
|
||||
removed += 1
|
||||
else:
|
||||
if not dry_run:
|
||||
rel["strength"] = strength
|
||||
scored += 1
|
||||
|
||||
if not dry_run:
|
||||
for key in to_remove:
|
||||
del relationships[key]
|
||||
|
||||
log.info(f"Scored {scored} relationships, decayed {decayed}, removed {removed} (strength < 0.1)")
|
||||
|
||||
return relationships
|
||||
|
||||
|
||||
# ─── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Knowledge graph cleanup")
|
||||
parser.add_argument("--classify", action="store_true", help="Classify unknown entities")
|
||||
parser.add_argument("--dedupe", action="store_true", help="Deduplicate entities")
|
||||
parser.add_argument("--score", action="store_true", help="Score relationships")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show changes without writing")
|
||||
args = parser.parse_args()
|
||||
|
||||
# If no specific flags, run all
|
||||
run_all = not (args.classify or args.dedupe or args.score)
|
||||
|
||||
entities = load_entities()
|
||||
relationships = load_relationships()
|
||||
|
||||
log.info(f"Loaded {len(entities)} entities, {len(relationships)} relationships")
|
||||
|
||||
# Backup before any modifications
|
||||
if not args.dry_run:
|
||||
backup(ENTITIES_PATH)
|
||||
backup(RELATIONSHIPS_PATH)
|
||||
|
||||
if args.dry_run:
|
||||
log.info("═══ DRY RUN — no files will be modified ═══")
|
||||
|
||||
if run_all or args.classify:
|
||||
log.info("─── Step 1: Classify Unknowns ───")
|
||||
entities = classify_unknowns(entities, dry_run=args.dry_run)
|
||||
|
||||
if run_all or args.dedupe:
|
||||
log.info("─── Step 2: Deduplicate Entities ───")
|
||||
entities, relationships = deduplicate(entities, relationships, dry_run=args.dry_run)
|
||||
|
||||
if run_all or args.score:
|
||||
log.info("─── Step 3: Score Relationships ───")
|
||||
relationships = score_relationships(relationships, dry_run=args.dry_run)
|
||||
|
||||
if not args.dry_run:
|
||||
atomic_write(ENTITIES_PATH, entities)
|
||||
atomic_write(RELATIONSHIPS_PATH, relationships)
|
||||
log.info(f"Done. Final: {len(entities)} entities, {len(relationships)} relationships")
|
||||
else:
|
||||
log.info(f"Dry run complete. Would result in: {len(entities)} entities, {len(relationships)} relationships")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s %(name)s %(levelname)s %(message)s',
|
||||
level=logging.INFO,
|
||||
)
|
||||
main()
|
||||
214
cortex/intelligence/llm_extractor.py
Normal file
214
cortex/intelligence/llm_extractor.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
LLM-Powered Entity Extractor — Uses Ollama for Named Entity Recognition.
|
||||
|
||||
Standalone module. No pip dependencies beyond stdlib.
|
||||
Calls Ollama HTTP API with structured NER prompts.
|
||||
|
||||
Configuration via environment variables:
|
||||
DARKPLEX_OLLAMA_URL — Ollama base URL (default: http://localhost:11434)
|
||||
DARKPLEX_OLLAMA_MODEL — Model name (default: mistral:7b)
|
||||
DARKPLEX_OLLAMA_TIMEOUT — Timeout in seconds (default: 10)
|
||||
DARKPLEX_EXTRACTOR — llm|regex|auto (default: auto)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
log = logging.getLogger("llm-extractor")
|
||||
|
||||
OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434")
|
||||
OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "llama3.2:1b")
|
||||
OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30"))
|
||||
|
||||
VALID_TYPES = {"person", "organization", "company", "project", "technology",
|
||||
"location", "event", "concept", "product"}
|
||||
|
||||
NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object.
|
||||
Each key is the entity name (lowercase), each value has "type" and "context".
|
||||
|
||||
Valid types: person, organization, company, project, technology, location, event, concept, product
|
||||
|
||||
Rules:
|
||||
- Skip common/generic words (the, system, message, etc.)
|
||||
- Entity names should be lowercase, use hyphens for multi-word
|
||||
- "context" is a 2-5 word description of the entity's role in the text
|
||||
- If no entities found, return empty JSON object
|
||||
- Return ONLY valid JSON, no explanation
|
||||
|
||||
Text:
|
||||
{text}
|
||||
|
||||
JSON:"""
|
||||
|
||||
BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object.
|
||||
Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context".
|
||||
|
||||
Valid types: person, organization, company, project, technology, location, event, concept, product
|
||||
|
||||
Rules:
|
||||
- Skip common/generic words
|
||||
- "context" is a 2-5 word description
|
||||
- If no entities found, return empty JSON object
|
||||
- Return ONLY valid JSON, no markdown, no explanation
|
||||
|
||||
Texts:
|
||||
{texts}
|
||||
|
||||
JSON:"""
|
||||
|
||||
|
||||
def _call_ollama(prompt: str) -> str | None:
|
||||
"""Call Ollama generate API. Returns response text or None on failure."""
|
||||
payload = json.dumps({
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 1024},
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/api/generate",
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
return data.get("response", "")
|
||||
except (urllib.error.URLError, TimeoutError, OSError) as e:
|
||||
log.warning(f"Ollama call failed: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
log.warning(f"Ollama unexpected error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _parse_json_response(text: str) -> dict:
|
||||
"""Extract JSON dict from LLM response, handling markdown fences etc."""
|
||||
if not text:
|
||||
return {}
|
||||
# Strip markdown code fences
|
||||
text = text.strip()
|
||||
if text.startswith("```"):
|
||||
lines = text.split("\n")
|
||||
lines = [l for l in lines if not l.strip().startswith("```")]
|
||||
text = "\n".join(lines)
|
||||
|
||||
# Find the JSON object
|
||||
start = text.find("{")
|
||||
if start == -1:
|
||||
return {}
|
||||
|
||||
# Find matching closing brace
|
||||
depth = 0
|
||||
for i in range(start, len(text)):
|
||||
if text[i] == "{":
|
||||
depth += 1
|
||||
elif text[i] == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
try:
|
||||
return json.loads(text[start:i + 1])
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _normalize_entities(raw: dict) -> dict:
|
||||
"""Normalize and validate extracted entities."""
|
||||
result = {}
|
||||
for name, info in raw.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
name = name.strip().lower().replace("_", "-").replace(" ", "-")
|
||||
if len(name) < 2 or len(name) > 80:
|
||||
continue
|
||||
|
||||
etype = info.get("type", "unknown").lower().strip()
|
||||
if etype not in VALID_TYPES:
|
||||
# Map common aliases
|
||||
aliases = {"org": "organization", "tech": "technology", "loc": "location",
|
||||
"place": "location", "tool": "technology", "framework": "technology",
|
||||
"language": "technology", "app": "product", "software": "product",
|
||||
"service": "product", "group": "organization", "team": "organization"}
|
||||
etype = aliases.get(etype, "concept")
|
||||
|
||||
context = info.get("context", "")
|
||||
if isinstance(context, str):
|
||||
context = context[:100]
|
||||
else:
|
||||
context = ""
|
||||
|
||||
result[name] = {"type": etype, "context": context, "match": "llm"}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def extract_entities_llm(text: str) -> dict[str, dict] | None:
|
||||
"""
|
||||
Extract entities from text using Ollama LLM.
|
||||
|
||||
Returns dict of {name: {type, context, match}} or None if LLM unavailable.
|
||||
None signals caller to fall back to regex.
|
||||
"""
|
||||
if not text or len(text) < 10:
|
||||
return {}
|
||||
|
||||
# Truncate very long texts
|
||||
if len(text) > 2000:
|
||||
text = text[:2000]
|
||||
|
||||
prompt = NER_PROMPT.format(text=text)
|
||||
response = _call_ollama(prompt)
|
||||
if response is None:
|
||||
return None # Signal fallback
|
||||
|
||||
raw = _parse_json_response(response)
|
||||
return _normalize_entities(raw)
|
||||
|
||||
|
||||
def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None:
|
||||
"""
|
||||
Extract entities from multiple texts in one LLM call.
|
||||
|
||||
Returns combined dict or None if LLM unavailable.
|
||||
"""
|
||||
if not texts:
|
||||
return {}
|
||||
|
||||
# Filter and truncate
|
||||
clean = []
|
||||
for t in texts:
|
||||
if t and len(t) >= 10:
|
||||
clean.append(t[:500] if len(t) > 500 else t)
|
||||
if not clean:
|
||||
return {}
|
||||
|
||||
# Limit batch size to keep prompt reasonable
|
||||
if len(clean) > 10:
|
||||
clean = clean[:10]
|
||||
|
||||
numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean))
|
||||
prompt = BATCH_PROMPT.format(texts=numbered)
|
||||
response = _call_ollama(prompt)
|
||||
if response is None:
|
||||
return None
|
||||
|
||||
raw = _parse_json_response(response)
|
||||
return _normalize_entities(raw)
|
||||
|
||||
|
||||
def is_available() -> bool:
|
||||
"""Check if Ollama is reachable."""
|
||||
try:
|
||||
req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET")
|
||||
with urllib.request.urlopen(req, timeout=3) as resp:
|
||||
return resp.status == 200
|
||||
except Exception:
|
||||
return False
|
||||
830
cortex/intelligence/loop.py
Normal file
830
cortex/intelligence/loop.py
Normal file
|
|
@ -0,0 +1,830 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Darkplex Loop — The single heartbeat of the intelligence pipeline.
|
||||
|
||||
One process. One loop. One state machine.
|
||||
Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
|
||||
|
||||
Each cycle:
|
||||
1. INGEST — Fetch new events from NATS (batch consumer pull)
|
||||
2. EXTRACT — Pull entities and relationships from events
|
||||
3. BRIDGE — Sync cortex outputs to knowledge engine
|
||||
4. VERIFY — Check that real output was produced
|
||||
5. REPORT — Update state, alert on failure
|
||||
|
||||
States:
|
||||
RUNNING — Everything nominal
|
||||
DEGRADED — A step failed, but loop continues with recovery attempts
|
||||
EMERGENCY — Critical failure, alerting
|
||||
|
||||
Usage:
|
||||
darkplex loop # Run loop (default: 1h cycle)
|
||||
darkplex loop --once # Single cycle, then exit
|
||||
darkplex loop --cycle 3600 # Custom cycle interval (seconds)
|
||||
darkplex loop --status # Print current state and exit
|
||||
darkplex loop --check # Check for new events, exit 0=new 1=none
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import urllib.request
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ── Paths (configurable via env) ─────────────────────────────────────────────
|
||||
|
||||
BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
|
||||
SCRIPT_DIR = BASE_DIR / "scripts"
|
||||
LEVEL4_DIR = SCRIPT_DIR / "level4"
|
||||
LOG_DIR = BASE_DIR / "logs"
|
||||
STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
|
||||
KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
|
||||
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
|
||||
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
|
||||
|
||||
NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
|
||||
NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
|
||||
NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
|
||||
DEFAULT_CYCLE_SECONDS = 3600 # 1 hour
|
||||
ALERT_COOLDOWN = 3600 # 1 alert per hour max
|
||||
|
||||
log = logging.getLogger("darkplex-loop")
|
||||
|
||||
|
||||
# ── State Machine ────────────────────────────────────────────────────────────
|
||||
|
||||
class LoopState:
|
||||
"""Persistent state for the Darkplex Loop."""
|
||||
|
||||
def __init__(self):
|
||||
self.status = "INIT"
|
||||
self.cycle_count = 0
|
||||
self.last_cycle = None
|
||||
self.last_success = None
|
||||
self.last_failure = None
|
||||
self.last_alert = None
|
||||
self.consecutive_failures = 0
|
||||
self.entities_total = 0
|
||||
self.relationships_total = 0
|
||||
self.entities_extracted_last = 0
|
||||
self.entities_new_last = 0
|
||||
self.events_processed_last = 0
|
||||
self.steps = {}
|
||||
self.error = None
|
||||
self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
|
||||
self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}]
|
||||
self.quality_metrics = {} # {unknown_rate, llm_success_rate, avg_entities_per_event}
|
||||
self.quality_history = [] # last 10: [{cycle, unknown_rate, llm_success_rate}]
|
||||
self.ollama_status = "unknown" # healthy|degraded|down
|
||||
self._load()
|
||||
|
||||
def _load(self):
|
||||
try:
|
||||
data = json.loads(STATE_FILE.read_text())
|
||||
for k, v in data.items():
|
||||
if hasattr(self, k):
|
||||
setattr(self, k, v)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
def save(self):
|
||||
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
|
||||
|
||||
def record_perf(self, perf: dict):
|
||||
"""Record performance metrics for this cycle."""
|
||||
self.perf = perf
|
||||
# Include unknown_rate in perf_history if available
|
||||
if self.quality_metrics:
|
||||
perf["unknown_rate"] = self.quality_metrics.get("unknown_rate", 0)
|
||||
self.perf_history.append(perf)
|
||||
self.perf_history = self.perf_history[-10:] # keep last 10
|
||||
|
||||
def perf_averages(self) -> dict:
|
||||
"""Running averages over last 10 cycles."""
|
||||
if not self.perf_history:
|
||||
return {}
|
||||
keys = self.perf_history[0].keys()
|
||||
return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
|
||||
|
||||
def record_success(self, step_results: dict):
|
||||
self.status = "RUNNING"
|
||||
self.consecutive_failures = 0
|
||||
self.last_success = datetime.now(timezone.utc).isoformat()
|
||||
self.last_cycle = self.last_success
|
||||
self.cycle_count += 1
|
||||
self.steps = step_results
|
||||
self.error = None
|
||||
self.save()
|
||||
|
||||
def record_failure(self, step: str, error: str):
|
||||
self.consecutive_failures += 1
|
||||
self.last_failure = datetime.now(timezone.utc).isoformat()
|
||||
self.last_cycle = self.last_failure
|
||||
self.cycle_count += 1
|
||||
self.error = f"{step}: {error}"
|
||||
if self.consecutive_failures >= 3:
|
||||
self.status = "EMERGENCY"
|
||||
else:
|
||||
self.status = "DEGRADED"
|
||||
self.save()
|
||||
|
||||
def can_alert(self) -> bool:
|
||||
if not self.last_alert:
|
||||
return True
|
||||
try:
|
||||
last = datetime.fromisoformat(self.last_alert)
|
||||
return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
|
||||
except (ValueError, TypeError):
|
||||
return True
|
||||
|
||||
def mark_alerted(self):
|
||||
self.last_alert = datetime.now(timezone.utc).isoformat()
|
||||
self.save()
|
||||
|
||||
|
||||
# ── Pipeline Steps ───────────────────────────────────────────────────────────
|
||||
|
||||
def _nats_cmd():
|
||||
"""Build NATS CLI base command with auth."""
|
||||
nats_bin = os.environ.get("NATS_BIN", "nats")
|
||||
nats_url = os.environ.get("NATS_URL", "")
|
||||
if nats_url:
|
||||
return [nats_bin, "-s", nats_url]
|
||||
return [nats_bin]
|
||||
|
||||
|
||||
def check_new_events() -> int:
|
||||
"""Return number of pending events in the consumer. 0 = nothing new."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return -1
|
||||
info = json.loads(r.stdout)
|
||||
return info.get("num_pending", 0)
|
||||
except Exception as e:
|
||||
log.warning(f"check_new_events failed: {e}")
|
||||
return -1
|
||||
|
||||
|
||||
def step_ingest(state: LoopState) -> dict:
|
||||
"""Step 1: Fetch new events from NATS using batch consumer pull."""
|
||||
log.info("STEP 1: INGEST — Fetching events from NATS")
|
||||
|
||||
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
|
||||
|
||||
# Check how many pending
|
||||
pending = check_new_events()
|
||||
if pending == 0:
|
||||
log.info("INGEST: No new events — skipping cycle")
|
||||
return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
|
||||
log.info(f"INGEST: {pending} pending events in consumer")
|
||||
|
||||
events = []
|
||||
total_fetched = 0
|
||||
parse_errors = 0
|
||||
|
||||
# Fetch in batches
|
||||
remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
|
||||
try:
|
||||
batch_size = min(remaining, NATS_BATCH_SIZE)
|
||||
result = subprocess.run(
|
||||
_nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
|
||||
"--count", str(batch_size), "--raw"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
|
||||
return _step_ingest_sequential(state)
|
||||
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
events.append(data)
|
||||
total_fetched += 1
|
||||
except json.JSONDecodeError:
|
||||
parse_errors += 1
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
log.warning("Batch fetch timed out, falling back to sequential")
|
||||
return _step_ingest_sequential(state)
|
||||
|
||||
# Update sequence tracking (get current stream seq from consumer info)
|
||||
try:
|
||||
r = subprocess.run(
|
||||
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode == 0:
|
||||
info = json.loads(r.stdout)
|
||||
stream_seq = info["delivered"]["stream_seq"]
|
||||
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
|
||||
except Exception:
|
||||
log.warning("Could not save last processed sequence")
|
||||
|
||||
log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
|
||||
return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
|
||||
|
||||
|
||||
def _step_ingest_sequential(state: LoopState) -> dict:
|
||||
"""Fallback: sequential fetch via stream get (slow but reliable)."""
|
||||
import base64
|
||||
log.info("INGEST FALLBACK: Sequential fetch")
|
||||
|
||||
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
|
||||
last_processed_seq = 0
|
||||
try:
|
||||
if last_processed_seq_file.exists():
|
||||
last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
r = subprocess.run(
|
||||
_nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return {"events": [], "total_scanned": 0, "skipped": 0}
|
||||
|
||||
info = json.loads(r.stdout)
|
||||
end_seq = info["state"]["last_seq"]
|
||||
start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
|
||||
|
||||
events = []
|
||||
skipped = 0
|
||||
for seq in range(start_seq, end_seq + 1):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
_nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
skipped += 1
|
||||
continue
|
||||
msg = json.loads(result.stdout)
|
||||
if "conversation_message_in" not in msg.get("subject", ""):
|
||||
skipped += 1
|
||||
continue
|
||||
data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
|
||||
events.append(data)
|
||||
except Exception:
|
||||
skipped += 1
|
||||
|
||||
try:
|
||||
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
|
||||
return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
|
||||
|
||||
|
||||
def step_extract(state: LoopState, events: list) -> dict:
|
||||
"""Step 2: Extract entities and relationships from events."""
|
||||
log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
|
||||
|
||||
if not events:
|
||||
log.info("EXTRACT: No events to process")
|
||||
return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
|
||||
|
||||
sys.path.insert(0, str(LEVEL4_DIR))
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
|
||||
em = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(em)
|
||||
|
||||
# Try LLM batch extraction first
|
||||
from llm_extractor import extract_entities_llm_batch, is_available as llm_available
|
||||
use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
|
||||
llm_ok = use_llm and llm_available()
|
||||
if llm_ok:
|
||||
log.info("EXTRACT: Using LLM extractor (Ollama)")
|
||||
else:
|
||||
log.info("EXTRACT: Using regex extractor (fallback)")
|
||||
|
||||
known = em.load_known_entities()
|
||||
entities = em.load_json(ENTITIES_FILE)
|
||||
relationships = em.load_json(RELATIONSHIPS_FILE)
|
||||
|
||||
total_extracted = 0
|
||||
new_entities = 0
|
||||
new_relationships = 0
|
||||
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
# Prepare texts for potential batch LLM processing
|
||||
event_texts = []
|
||||
for event in events:
|
||||
payload = event.get("payload", {})
|
||||
text = payload.get("text_preview", "") or payload.get("text", "")
|
||||
if isinstance(text, list):
|
||||
parts = []
|
||||
for t in text:
|
||||
parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
|
||||
text = " ".join(parts)
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
score = _importance(text) if text else 0.0
|
||||
event_texts.append((text, score))
|
||||
|
||||
# LLM batch extraction for qualifying texts (cap at 50 to keep cycle time reasonable)
|
||||
llm_results = {}
|
||||
if llm_ok:
|
||||
batch_texts = [t for t, s in sorted(
|
||||
[(t, s) for t, s in event_texts if t and s >= 0.4],
|
||||
key=lambda x: -x[1] # highest importance first
|
||||
)][:50]
|
||||
if batch_texts:
|
||||
consecutive_fails = 0
|
||||
for i in range(0, len(batch_texts), 10):
|
||||
if consecutive_fails >= 3:
|
||||
log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
|
||||
llm_ok = False
|
||||
break
|
||||
chunk = batch_texts[i:i+10]
|
||||
batch_result = extract_entities_llm_batch(chunk)
|
||||
if batch_result:
|
||||
llm_results.update(batch_result)
|
||||
consecutive_fails = 0
|
||||
else:
|
||||
consecutive_fails += 1
|
||||
if llm_results:
|
||||
log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
|
||||
|
||||
for idx, event in enumerate(events):
|
||||
text, score = event_texts[idx]
|
||||
if not text or score < 0.4:
|
||||
continue
|
||||
|
||||
if llm_ok and llm_results:
|
||||
# Use LLM results + known entity matching
|
||||
found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
|
||||
# Add LLM entities that appear in this text
|
||||
text_lower = text.lower()
|
||||
for name, info in llm_results.items():
|
||||
variants = [name, name.replace("-", " "), name.replace("-", "")]
|
||||
if any(v in text_lower for v in variants if len(v) > 2):
|
||||
found[name] = info
|
||||
else:
|
||||
found = em.extract_entities(text, known)
|
||||
if not found:
|
||||
continue
|
||||
|
||||
total_extracted += len(found)
|
||||
names = list(found.keys())
|
||||
|
||||
for name, info in found.items():
|
||||
if name not in entities:
|
||||
entities[name] = {
|
||||
"type": info["type"],
|
||||
"source": "darkplex-loop",
|
||||
"first_seen": ts_now,
|
||||
}
|
||||
new_entities += 1
|
||||
known[name] = entities[name]
|
||||
|
||||
if len(names) >= 2:
|
||||
for i in range(len(names)):
|
||||
for j in range(i + 1, min(len(names), i + 5)):
|
||||
a, b = min(names[i], names[j]), max(names[i], names[j])
|
||||
key = f"{a}::{b}"
|
||||
if key in relationships:
|
||||
relationships[key]["count"] = relationships[key].get("count", 1) + 1
|
||||
relationships[key]["last_seen"] = ts_now
|
||||
else:
|
||||
relationships[key] = {
|
||||
"a": a, "b": b, "types": ["co-occurrence"],
|
||||
"count": 1, "first_seen": ts_now, "last_seen": ts_now,
|
||||
}
|
||||
new_relationships += 1
|
||||
|
||||
em.save_json(ENTITIES_FILE, entities)
|
||||
em.save_json(RELATIONSHIPS_FILE, relationships)
|
||||
|
||||
state.entities_total = len(entities)
|
||||
state.relationships_total = len(relationships)
|
||||
state.entities_extracted_last = total_extracted
|
||||
state.entities_new_last = new_entities
|
||||
state.events_processed_last = len(events)
|
||||
|
||||
log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
|
||||
return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
|
||||
|
||||
|
||||
def step_bridge(state: LoopState) -> dict:
|
||||
"""Step 3: Run knowledge bridge."""
|
||||
log.info("STEP 3: BRIDGE — Syncing cortex outputs")
|
||||
|
||||
bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
|
||||
if not bridge_script.exists():
|
||||
log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
|
||||
return {"status": "skipped", "reason": "script not found"}
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(bridge_script), "sync"],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
|
||||
return {"status": "failed", "error": result.stderr[:200]}
|
||||
|
||||
bridged = 0
|
||||
for line in result.stdout.split("\n"):
|
||||
m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
|
||||
if m:
|
||||
bridged += int(m.group(1))
|
||||
|
||||
log.info(f"BRIDGE: {bridged} items bridged")
|
||||
return {"status": "ok", "bridged": bridged}
|
||||
|
||||
|
||||
def _check_quality(state: LoopState, extract_result: dict) -> list:
|
||||
"""Check entity quality metrics. Returns list of issues/warnings."""
|
||||
issues = []
|
||||
|
||||
# Load entities and compute unknown_rate
|
||||
try:
|
||||
entities = json.loads(ENTITIES_FILE.read_text()) if ENTITIES_FILE.exists() else {}
|
||||
except (json.JSONDecodeError, OSError):
|
||||
entities = {}
|
||||
|
||||
total = len(entities)
|
||||
unknown_count = sum(1 for e in entities.values() if e.get("type") == "unknown")
|
||||
unknown_rate = (unknown_count / total * 100) if total > 0 else 0.0
|
||||
|
||||
events_processed = state.events_processed_last or 1
|
||||
extracted = extract_result.get("extracted", 0)
|
||||
avg_entities_per_event = extracted / events_processed if events_processed > 0 else 0.0
|
||||
|
||||
# Estimate LLM success rate from extraction (if LLM was used, new_entities > 0 is a proxy)
|
||||
llm_success_rate = 100.0 # default if no LLM used
|
||||
# We track this per-cycle based on whether extraction produced results
|
||||
if events_processed > 10 and extracted == 0:
|
||||
llm_success_rate = 0.0
|
||||
|
||||
state.quality_metrics = {
|
||||
"unknown_rate": round(unknown_rate, 1),
|
||||
"llm_success_rate": round(llm_success_rate, 1),
|
||||
"avg_entities_per_event": round(avg_entities_per_event, 2),
|
||||
}
|
||||
|
||||
if unknown_rate > 30:
|
||||
issues.append(f"High unknown entity rate: {unknown_rate:.1f}% ({unknown_count}/{total})")
|
||||
|
||||
# Track quality history and detect trends
|
||||
state.quality_history.append({
|
||||
"cycle": state.cycle_count + 1,
|
||||
"unknown_rate": round(unknown_rate, 1),
|
||||
"llm_success_rate": round(llm_success_rate, 1),
|
||||
})
|
||||
state.quality_history = state.quality_history[-10:] # keep last 10
|
||||
|
||||
# Check if unknown_rate rising 3 cycles in a row
|
||||
if len(state.quality_history) >= 3:
|
||||
last3 = [h["unknown_rate"] for h in state.quality_history[-3:]]
|
||||
if last3[0] < last3[1] < last3[2]:
|
||||
issues.append(f"Entity quality degrading — unknown_rate rising: {last3}")
|
||||
|
||||
log.info(f"VERIFY/QUALITY: unknown_rate={unknown_rate:.1f}%, avg_entities/event={avg_entities_per_event:.2f}")
|
||||
return issues
|
||||
|
||||
|
||||
def _check_ollama(state: LoopState) -> list:
|
||||
"""Check Ollama health. Returns list of issues."""
|
||||
issues = []
|
||||
model = os.environ.get("DARKPLEX_OLLAMA_MODEL", os.environ.get("OLLAMA_MODEL", ""))
|
||||
|
||||
try:
|
||||
req = urllib.request.Request("http://localhost:11434/api/tags", method="GET")
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
data = json.loads(resp.read())
|
||||
models = [m.get("name", "") for m in data.get("models", [])]
|
||||
if model and not any(model in m for m in models):
|
||||
state.ollama_status = "degraded"
|
||||
issues.append(f"Ollama up but model '{model}' not loaded (available: {models[:5]})")
|
||||
log.warning(f"VERIFY/OLLAMA: degraded — model '{model}' not in {models[:5]}")
|
||||
else:
|
||||
state.ollama_status = "healthy"
|
||||
log.info(f"VERIFY/OLLAMA: healthy ({len(models)} models)")
|
||||
except Exception as e:
|
||||
state.ollama_status = "down"
|
||||
issues.append(f"Ollama down: {e}")
|
||||
log.warning(f"VERIFY/OLLAMA: down — {e}")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def _check_performance(state: LoopState) -> list:
|
||||
"""Check for performance regressions. Returns list of issues."""
|
||||
issues = []
|
||||
|
||||
if len(state.perf_history) < 2:
|
||||
return issues
|
||||
|
||||
current = state.perf
|
||||
avgs = state.perf_averages()
|
||||
|
||||
# Check total time vs rolling average
|
||||
curr_total = current.get("total_ms", 0)
|
||||
avg_total = avgs.get("total_ms", 0)
|
||||
if avg_total > 0 and curr_total > 2 * avg_total:
|
||||
issues.append(f"Performance regression detected: {curr_total}ms vs avg {avg_total}ms")
|
||||
|
||||
# Check extraction time
|
||||
extract_ms = current.get("extract_ms", 0)
|
||||
if extract_ms > 120000:
|
||||
issues.append(f"Extraction too slow: {extract_ms}ms (>2min)")
|
||||
|
||||
if issues:
|
||||
for i in issues:
|
||||
log.warning(f"VERIFY/PERF: {i}")
|
||||
else:
|
||||
log.info(f"VERIFY/PERF: OK (total={curr_total}ms, avg={avg_total}ms)")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def step_verify(state: LoopState, extract_result: dict) -> dict:
|
||||
"""Step 4: Verify output quality."""
|
||||
log.info("STEP 4: VERIFY — Checking output quality")
|
||||
|
||||
issues = []
|
||||
|
||||
# File integrity checks
|
||||
for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
|
||||
if not f.exists():
|
||||
issues.append(f"{label} file missing")
|
||||
else:
|
||||
try:
|
||||
data = json.loads(f.read_text())
|
||||
if not data:
|
||||
issues.append(f"{label} file is empty")
|
||||
except json.JSONDecodeError:
|
||||
issues.append(f"{label} file is corrupt JSON")
|
||||
|
||||
events_processed = state.events_processed_last
|
||||
extracted = extract_result.get("extracted", 0)
|
||||
if events_processed > 10 and extracted == 0:
|
||||
issues.append(f"0 entities from {events_processed} events — extraction may be broken")
|
||||
|
||||
# NATS check
|
||||
try:
|
||||
r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
|
||||
if r.returncode != 0:
|
||||
issues.append("NATS unreachable")
|
||||
except Exception as e:
|
||||
issues.append(f"NATS check failed: {e}")
|
||||
|
||||
# New monitoring checks
|
||||
issues.extend(_check_quality(state, extract_result))
|
||||
issues.extend(_check_ollama(state))
|
||||
issues.extend(_check_performance(state))
|
||||
|
||||
verdict = "PASS" if not issues else "FAIL"
|
||||
log.info(f"VERIFY: {verdict} — {len(issues)} issues")
|
||||
for issue in issues:
|
||||
log.warning(f" ⚠ {issue}")
|
||||
|
||||
return {"verdict": verdict, "issues": issues}
|
||||
|
||||
|
||||
def step_report(state: LoopState, verify_result: dict):
|
||||
"""Step 5: Alert if degraded/emergency."""
|
||||
if state.status == "RUNNING":
|
||||
return
|
||||
|
||||
if not state.can_alert():
|
||||
log.info("REPORT: Alert cooldown active, skipping")
|
||||
return
|
||||
|
||||
severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
|
||||
msg = (
|
||||
f"Darkplex Loop {severity}\n"
|
||||
f"Consecutive failures: {state.consecutive_failures}\n"
|
||||
f"Error: {state.error}\n"
|
||||
f"Issues: {', '.join(verify_result.get('issues', []))}"
|
||||
)
|
||||
|
||||
log.warning(f"REPORT: Sending alert — {state.status}")
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
flag = LOG_DIR / "darkplex-loop-alert.flag"
|
||||
flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
|
||||
state.mark_alerted()
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _importance(text: str) -> float:
|
||||
"""Importance scoring for event text."""
|
||||
if not text:
|
||||
return 0.0
|
||||
score = 0.3
|
||||
if len(text) > 200: score += 0.1
|
||||
if len(text) > 500: score += 0.1
|
||||
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
|
||||
if caps > 3: score += 0.1
|
||||
if caps > 8: score += 0.1
|
||||
for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
|
||||
if p.lower() in text.lower():
|
||||
score -= 0.3
|
||||
for w in ["meeting", "project", "company", "contract", "decision", "strategy",
|
||||
"budget", "deadline", "milestone", "partnership", "investment", "revenue",
|
||||
"client", "proposal", "agreement"]:
|
||||
if w in text.lower():
|
||||
score += 0.05
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
|
||||
def print_status():
|
||||
"""Print current loop state."""
|
||||
state = LoopState()
|
||||
|
||||
ent_count = rel_count = 0
|
||||
try:
|
||||
ent_count = len(json.loads(ENTITIES_FILE.read_text()))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "⚪")
|
||||
print(f"{icon} Status: {state.status}")
|
||||
print(f"Cycles: {state.cycle_count}")
|
||||
print(f"Last cycle: {state.last_cycle or 'never'}")
|
||||
print(f"Last success: {state.last_success or 'never'}")
|
||||
print(f"Last failure: {state.last_failure or 'never'}")
|
||||
print(f"Failures: {state.consecutive_failures}")
|
||||
print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
|
||||
print(f"Relationships:{rel_count} total")
|
||||
if state.quality_metrics:
|
||||
qm = state.quality_metrics
|
||||
print(f"Quality: unknown_rate={qm.get('unknown_rate', '?')}% llm_success={qm.get('llm_success_rate', '?')}% avg_ent/event={qm.get('avg_entities_per_event', '?')}")
|
||||
print(f"Ollama: {state.ollama_status}")
|
||||
if state.perf:
|
||||
print(f"Last perf: {state.perf}")
|
||||
if state.error:
|
||||
print(f"Error: {state.error}")
|
||||
|
||||
|
||||
# ── Main Loop ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _ms_since(t0: float) -> int:
|
||||
return int((time.monotonic() - t0) * 1000)
|
||||
|
||||
|
||||
def run_cycle(state: LoopState) -> bool:
|
||||
"""Run one complete pipeline cycle. Returns True on success."""
|
||||
log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
|
||||
step_results = {}
|
||||
perf = {}
|
||||
t_cycle = time.monotonic()
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
ingest = step_ingest(state)
|
||||
perf["ingest_ms"] = _ms_since(t0)
|
||||
step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
|
||||
|
||||
# Early skip if no new events
|
||||
if ingest.get("skip_reason") == "no_new_events":
|
||||
perf["total_ms"] = _ms_since(t_cycle)
|
||||
state.record_perf(perf)
|
||||
state.save()
|
||||
log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
|
||||
return True
|
||||
|
||||
t0 = time.monotonic()
|
||||
extract = step_extract(state, ingest["events"])
|
||||
perf["extract_ms"] = _ms_since(t0)
|
||||
step_results["extract"] = extract
|
||||
|
||||
t0 = time.monotonic()
|
||||
bridge = step_bridge(state)
|
||||
perf["bridge_ms"] = _ms_since(t0)
|
||||
step_results["bridge"] = bridge
|
||||
|
||||
t0 = time.monotonic()
|
||||
verify = step_verify(state, extract)
|
||||
perf["verify_ms"] = _ms_since(t0)
|
||||
step_results["verify"] = verify
|
||||
|
||||
perf["total_ms"] = _ms_since(t_cycle)
|
||||
state.record_perf(perf)
|
||||
|
||||
if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
|
||||
state.record_failure("verify", "; ".join(verify["issues"]))
|
||||
step_report(state, verify)
|
||||
return False
|
||||
|
||||
state.record_success(step_results)
|
||||
avgs = state.perf_averages()
|
||||
log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status} — {perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
|
||||
log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
|
||||
|
||||
flag = LOG_DIR / "darkplex-loop-alert.flag"
|
||||
if flag.exists():
|
||||
flag.unlink()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
perf["total_ms"] = _ms_since(t_cycle)
|
||||
state.record_perf(perf)
|
||||
step_name = "unknown"
|
||||
for name in ["ingest", "extract", "bridge", "verify"]:
|
||||
if name not in step_results:
|
||||
step_name = name
|
||||
break
|
||||
log.error(f"CYCLE FAILED at {step_name}: {e}")
|
||||
log.error(traceback.format_exc())
|
||||
state.record_failure(step_name, str(e)[:300])
|
||||
step_report(state, {"issues": [str(e)]})
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for `darkplex loop`."""
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
args = sys.argv[1:]
|
||||
|
||||
if "--status" in args:
|
||||
print_status()
|
||||
return
|
||||
|
||||
if "--check" in args:
|
||||
pending = check_new_events()
|
||||
if pending > 0:
|
||||
print(f"NEW: {pending} events pending")
|
||||
sys.exit(0)
|
||||
elif pending == 0:
|
||||
print("NONE: No new events")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("ERROR: Could not check")
|
||||
sys.exit(2)
|
||||
|
||||
once = "--once" in args
|
||||
cycle_seconds = DEFAULT_CYCLE_SECONDS
|
||||
|
||||
for i, arg in enumerate(args):
|
||||
if arg == "--cycle" and i + 1 < len(args):
|
||||
cycle_seconds = int(args[i + 1])
|
||||
|
||||
state = LoopState()
|
||||
log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
|
||||
|
||||
running = True
|
||||
def handle_signal(sig, frame):
|
||||
nonlocal running
|
||||
log.info("Shutdown signal received")
|
||||
running = False
|
||||
signal.signal(signal.SIGTERM, handle_signal)
|
||||
signal.signal(signal.SIGINT, handle_signal)
|
||||
|
||||
while running:
|
||||
run_cycle(state)
|
||||
|
||||
if once:
|
||||
break
|
||||
|
||||
log.info(f"Sleeping {cycle_seconds}s until next cycle...")
|
||||
for _ in range(cycle_seconds):
|
||||
if not running:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
log.info("Darkplex Loop stopped")
|
||||
152
cortex/intelligence/shared_memory.py
Normal file
152
cortex/intelligence/shared_memory.py
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
"""Cross-Agent Memory Bus: NATS pub/sub for agent insights.
|
||||
|
||||
Agents publish insights (observations, learned facts, warnings) to the bus.
|
||||
Other agents subscribe to topics relevant to their function.
|
||||
|
||||
⚠️ DATA ISOLATION: Only Vainplex-internal agents participate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Callable, Awaitable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
|
||||
|
||||
# Only these agents are allowed to participate in shared memory
|
||||
ALLOWED_AGENTS: set[str] = set(
|
||||
os.environ.get("INTELLIGENCE_ALLOWED_AGENTS", "claudia,vera,stella,viola").split(",")
|
||||
)
|
||||
|
||||
INSIGHT_SUBJECT_PREFIX = "darkplex.intelligence.insights"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Insight:
|
||||
"""An agent insight to be shared across the memory bus."""
|
||||
|
||||
agent: str
|
||||
topic: str
|
||||
content: str
|
||||
confidence: float = 0.8 # 0.0-1.0
|
||||
tags: list[str] = field(default_factory=list)
|
||||
timestamp: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.timestamp:
|
||||
self.timestamp = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps({
|
||||
"agent": self.agent,
|
||||
"topic": self.topic,
|
||||
"content": self.content,
|
||||
"confidence": self.confidence,
|
||||
"tags": self.tags,
|
||||
"timestamp": self.timestamp,
|
||||
"metadata": self.metadata,
|
||||
})
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data: str) -> Insight:
|
||||
d = json.loads(data)
|
||||
return cls(**d)
|
||||
|
||||
|
||||
InsightHandler = Callable[[Insight], Awaitable[None]]
|
||||
|
||||
|
||||
class SharedMemory:
|
||||
"""Cross-agent memory bus using NATS pub/sub.
|
||||
|
||||
Usage:
|
||||
memory = SharedMemory(agent_name="claudia")
|
||||
await memory.connect()
|
||||
await memory.publish(Insight(agent="claudia", topic="infra", content="..."))
|
||||
await memory.subscribe("infra", handler)
|
||||
|
||||
⚠️ Enforces data isolation: only allowed agents can publish/subscribe.
|
||||
"""
|
||||
|
||||
def __init__(self, agent_name: str, nats_url: str | None = None) -> None:
|
||||
if agent_name not in ALLOWED_AGENTS:
|
||||
raise ValueError(
|
||||
f"Agent '{agent_name}' is not allowed in shared memory. "
|
||||
f"Allowed: {ALLOWED_AGENTS}"
|
||||
)
|
||||
self.agent_name = agent_name
|
||||
self.nats_url = nats_url or NATS_URL
|
||||
self._nats_client: Any = None
|
||||
self._subscriptions: list[Any] = []
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Connect to the NATS server."""
|
||||
try:
|
||||
import nats
|
||||
self._nats_client = await nats.connect(self.nats_url)
|
||||
logger.info("SharedMemory connected for agent '%s'", self.agent_name)
|
||||
except Exception:
|
||||
logger.exception("Failed to connect SharedMemory to NATS")
|
||||
raise
|
||||
|
||||
async def publish(self, insight: Insight) -> None:
|
||||
"""Publish an insight to the memory bus.
|
||||
|
||||
Args:
|
||||
insight: The insight to share. Agent field must match this instance's agent.
|
||||
"""
|
||||
if not self._nats_client:
|
||||
raise RuntimeError("Not connected. Call connect() first.")
|
||||
|
||||
if insight.agent not in ALLOWED_AGENTS:
|
||||
raise ValueError(f"Agent '{insight.agent}' not allowed to publish insights")
|
||||
|
||||
subject = f"{INSIGHT_SUBJECT_PREFIX}.{insight.topic}"
|
||||
await self._nats_client.publish(subject, insight.to_json().encode())
|
||||
logger.debug(
|
||||
"Published insight: %s/%s by %s", insight.topic, insight.content[:50], insight.agent
|
||||
)
|
||||
|
||||
async def subscribe(self, topic: str, handler: InsightHandler) -> None:
|
||||
"""Subscribe to insights on a topic.
|
||||
|
||||
Args:
|
||||
topic: Topic to subscribe to (supports NATS wildcards).
|
||||
handler: Async callback for received insights.
|
||||
"""
|
||||
if not self._nats_client:
|
||||
raise RuntimeError("Not connected. Call connect() first.")
|
||||
|
||||
subject = f"{INSIGHT_SUBJECT_PREFIX}.{topic}"
|
||||
|
||||
async def _message_handler(msg: Any) -> None:
|
||||
try:
|
||||
insight = Insight.from_json(msg.data.decode())
|
||||
if insight.agent not in ALLOWED_AGENTS:
|
||||
logger.warning(
|
||||
"Ignoring insight from non-allowed agent: %s", insight.agent
|
||||
)
|
||||
return
|
||||
await handler(insight)
|
||||
except Exception:
|
||||
logger.exception("Error handling insight message")
|
||||
|
||||
sub = await self._nats_client.subscribe(subject, cb=_message_handler)
|
||||
self._subscriptions.append(sub)
|
||||
logger.info("Subscribed to insights: %s", subject)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Unsubscribe and disconnect."""
|
||||
for sub in self._subscriptions:
|
||||
await sub.unsubscribe()
|
||||
self._subscriptions.clear()
|
||||
if self._nats_client:
|
||||
await self._nats_client.close()
|
||||
self._nats_client = None
|
||||
193
cortex/intelligence/temporal.py
Normal file
193
cortex/intelligence/temporal.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
"""Temporal Context API: chronological knowledge retrieval.
|
||||
|
||||
Queries NATS events and ChromaDB with a time dimension to answer:
|
||||
"What do we know about X, chronologically?"
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default config from environment
|
||||
NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
|
||||
CHROMADB_URL = os.environ.get("CHROMADB_URL", "http://localhost:8000")
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemporalEntry:
|
||||
"""A knowledge entry with temporal metadata."""
|
||||
|
||||
timestamp: datetime
|
||||
source: str # "nats" or "chromadb"
|
||||
topic: str
|
||||
content: str
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
relevance_score: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemporalQuery:
|
||||
"""Query parameters for temporal context retrieval."""
|
||||
|
||||
topic: str
|
||||
start_time: datetime | None = None
|
||||
end_time: datetime | None = None
|
||||
limit: int = 50
|
||||
sources: list[str] = field(default_factory=lambda: ["nats", "chromadb"])
|
||||
|
||||
|
||||
class TemporalContext:
|
||||
"""Retrieves chronological knowledge from NATS events and ChromaDB.
|
||||
|
||||
Usage:
|
||||
ctx = TemporalContext()
|
||||
entries = await ctx.query(TemporalQuery(topic="ssl-cert"))
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
nats_url: str | None = None,
|
||||
chromadb_url: str | None = None,
|
||||
) -> None:
|
||||
self.nats_url = nats_url or NATS_URL
|
||||
self.chromadb_url = chromadb_url or CHROMADB_URL
|
||||
self._nats_client: Any = None
|
||||
self._chroma_client: Any = None
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Initialize connections to NATS and ChromaDB."""
|
||||
try:
|
||||
import nats
|
||||
self._nats_client = await nats.connect(self.nats_url)
|
||||
logger.info("Connected to NATS: %s", self.nats_url)
|
||||
except Exception:
|
||||
logger.exception("Failed to connect to NATS")
|
||||
|
||||
try:
|
||||
import chromadb
|
||||
self._chroma_client = chromadb.HttpClient(host=self.chromadb_url)
|
||||
logger.info("Connected to ChromaDB: %s", self.chromadb_url)
|
||||
except Exception:
|
||||
logger.exception("Failed to connect to ChromaDB")
|
||||
|
||||
async def query(self, query: TemporalQuery) -> list[TemporalEntry]:
|
||||
"""Query temporal context across configured sources.
|
||||
|
||||
Returns entries sorted chronologically (oldest first).
|
||||
"""
|
||||
entries: list[TemporalEntry] = []
|
||||
|
||||
if "nats" in query.sources and self._nats_client:
|
||||
nats_entries = await self._query_nats(query)
|
||||
entries.extend(nats_entries)
|
||||
|
||||
if "chromadb" in query.sources and self._chroma_client:
|
||||
chroma_entries = self._query_chromadb(query)
|
||||
entries.extend(chroma_entries)
|
||||
|
||||
# Sort chronologically
|
||||
entries.sort(key=lambda e: e.timestamp)
|
||||
|
||||
# Apply limit
|
||||
if query.limit:
|
||||
entries = entries[:query.limit]
|
||||
|
||||
return entries
|
||||
|
||||
async def _query_nats(self, query: TemporalQuery) -> list[TemporalEntry]:
|
||||
"""Query NATS JetStream for historical events matching the topic."""
|
||||
entries: list[TemporalEntry] = []
|
||||
try:
|
||||
js = self._nats_client.jetstream()
|
||||
subject = f"darkplex.*.{query.topic}.>"
|
||||
|
||||
# Get messages from the stream
|
||||
sub = await js.subscribe(subject, ordered_consumer=True)
|
||||
count = 0
|
||||
async for msg in sub.messages:
|
||||
if count >= query.limit:
|
||||
break
|
||||
|
||||
timestamp = datetime.fromtimestamp(
|
||||
msg.headers.get("Nats-Time-Stamp", 0) if msg.headers else 0,
|
||||
tz=timezone.utc,
|
||||
)
|
||||
|
||||
if query.start_time and timestamp < query.start_time:
|
||||
continue
|
||||
if query.end_time and timestamp > query.end_time:
|
||||
continue
|
||||
|
||||
entries.append(TemporalEntry(
|
||||
timestamp=timestamp,
|
||||
source="nats",
|
||||
topic=query.topic,
|
||||
content=msg.data.decode() if msg.data else "",
|
||||
metadata={"subject": msg.subject},
|
||||
))
|
||||
count += 1
|
||||
|
||||
except Exception:
|
||||
logger.exception("NATS temporal query failed for topic: %s", query.topic)
|
||||
|
||||
return entries
|
||||
|
||||
def _query_chromadb(self, query: TemporalQuery) -> list[TemporalEntry]:
|
||||
"""Query ChromaDB for semantically relevant entries with time filtering."""
|
||||
entries: list[TemporalEntry] = []
|
||||
try:
|
||||
collection = self._chroma_client.get_or_create_collection("darkplex_knowledge")
|
||||
|
||||
where_filter: dict[str, Any] = {}
|
||||
if query.start_time:
|
||||
where_filter["timestamp"] = {"$gte": query.start_time.isoformat()}
|
||||
if query.end_time:
|
||||
if "timestamp" in where_filter:
|
||||
where_filter = {
|
||||
"$and": [
|
||||
{"timestamp": {"$gte": query.start_time.isoformat()}},
|
||||
{"timestamp": {"$lte": query.end_time.isoformat()}},
|
||||
]
|
||||
}
|
||||
else:
|
||||
where_filter["timestamp"] = {"$lte": query.end_time.isoformat()}
|
||||
|
||||
results = collection.query(
|
||||
query_texts=[query.topic],
|
||||
n_results=query.limit,
|
||||
where=where_filter if where_filter else None,
|
||||
)
|
||||
|
||||
if results and results.get("documents"):
|
||||
for i, doc in enumerate(results["documents"][0]):
|
||||
meta = results["metadatas"][0][i] if results.get("metadatas") else {}
|
||||
ts_str = meta.get("timestamp", "")
|
||||
try:
|
||||
ts = datetime.fromisoformat(ts_str)
|
||||
except (ValueError, TypeError):
|
||||
ts = datetime.now(timezone.utc)
|
||||
|
||||
entries.append(TemporalEntry(
|
||||
timestamp=ts,
|
||||
source="chromadb",
|
||||
topic=query.topic,
|
||||
content=doc,
|
||||
metadata=meta,
|
||||
relevance_score=results["distances"][0][i] if results.get("distances") else 0.0,
|
||||
))
|
||||
|
||||
except Exception:
|
||||
logger.exception("ChromaDB temporal query failed for topic: %s", query.topic)
|
||||
|
||||
return entries
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close connections."""
|
||||
if self._nats_client:
|
||||
await self._nats_client.close()
|
||||
345
cortex/knowledge_extractor.py
Executable file
345
cortex/knowledge_extractor.py
Executable file
|
|
@ -0,0 +1,345 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Smart Extractor — Extract entities from NATS events and update knowledge graph.
|
||||
Part of Level 4.4 AGI Roadmap.
|
||||
|
||||
Usage:
|
||||
smart-extractor.py --last 100 — Process last N events
|
||||
smart-extractor.py --since 6h — Process events from last 6 hours
|
||||
smart-extractor.py --dry-run — Show what would be extracted without saving
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import subprocess
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# Import entity-manager functions
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from importlib import import_module
|
||||
|
||||
SCRIPT_DIR = Path(__file__).parent
|
||||
LOG_DIR = Path.home() / "clawd" / "logs"
|
||||
LOG_FILE = LOG_DIR / "entity-extraction.log"
|
||||
KNOWLEDGE_DIR = Path.home() / ".cortex" / "knowledge"
|
||||
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
|
||||
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
|
||||
NATS_STREAM = "openclaw-events"
|
||||
CONSUMER_NAME = "kg-extractor-temp"
|
||||
|
||||
# Setup logging
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_FILE),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
log = logging.getLogger("smart-extractor")
|
||||
|
||||
|
||||
def load_json(path):
|
||||
try:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_json(path, data):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def importance_heuristic(text):
|
||||
"""Simple importance scoring (0-1) based on content heuristics."""
|
||||
if not text:
|
||||
return 0.0
|
||||
|
||||
score = 0.3 # base
|
||||
|
||||
# Boost for substantive content
|
||||
if len(text) > 200:
|
||||
score += 0.1
|
||||
if len(text) > 500:
|
||||
score += 0.1
|
||||
|
||||
# Boost for entity-rich content
|
||||
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
|
||||
if caps > 3:
|
||||
score += 0.1
|
||||
if caps > 8:
|
||||
score += 0.1
|
||||
|
||||
# Penalize heartbeat/cron noise
|
||||
noise_patterns = ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]
|
||||
for p in noise_patterns:
|
||||
if p.lower() in text.lower():
|
||||
score -= 0.3
|
||||
|
||||
# Boost for business/project content
|
||||
boost_words = ["meeting", "project", "company", "contract", "decision",
|
||||
"strategy", "budget", "deadline", "milestone", "partnership",
|
||||
"investment", "revenue", "client", "proposal", "agreement"]
|
||||
for w in boost_words:
|
||||
if w in text.lower():
|
||||
score += 0.05
|
||||
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
|
||||
def fetch_events_nats(last=None, since=None):
|
||||
"""Fetch events from NATS using consumer approach."""
|
||||
events = []
|
||||
|
||||
# Create a temporary pull consumer
|
||||
filter_subj = "openclaw.events.main.conversation_message_in"
|
||||
|
||||
# Use direct stream get instead of consumer (more reliable)
|
||||
try:
|
||||
# Get stream info for sequence range
|
||||
info_result = subprocess.run(
|
||||
["nats", "stream", "info", NATS_STREAM, "--json"],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if info_result.returncode != 0:
|
||||
log.error("Failed to get stream info")
|
||||
return events
|
||||
|
||||
info = json.loads(info_result.stdout)
|
||||
end_seq = info["state"]["last_seq"]
|
||||
start_seq = info["state"]["first_seq"]
|
||||
|
||||
# Calculate range
|
||||
count = last or 500
|
||||
if since:
|
||||
# Estimate start sequence from time
|
||||
ms_since = parse_since(since) * 1000
|
||||
total_ms = (time.time() * 1000) - (datetime.fromisoformat(info["state"]["first_ts"].replace("Z", "+00:00")).timestamp() * 1000)
|
||||
total_msgs = end_seq - start_seq
|
||||
msgs_per_ms = total_msgs / total_ms if total_ms > 0 else 1
|
||||
fetch_start = max(start_seq, int(end_seq - ms_since * msgs_per_ms * 1.2))
|
||||
else:
|
||||
fetch_start = max(start_seq, end_seq - count)
|
||||
|
||||
# Only fetch conversation messages
|
||||
log.info(f"Fetching sequences {fetch_start} - {end_seq}")
|
||||
step = max(1, (end_seq - fetch_start) // count)
|
||||
|
||||
for seq in range(fetch_start, end_seq + 1, step):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nats", "stream", "get", NATS_STREAM, str(seq), "--json"],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode != 0:
|
||||
continue
|
||||
msg = json.loads(result.stdout)
|
||||
subj = msg.get("subject", "")
|
||||
if "conversation_message_in" not in subj:
|
||||
continue
|
||||
import base64
|
||||
# Input validation: max size check (1MB)
|
||||
raw_data = msg.get("data", "")
|
||||
if len(raw_data) > 1_048_576:
|
||||
log.warning("Skipping oversized message at seq %d (%d bytes)", seq, len(raw_data))
|
||||
continue
|
||||
try:
|
||||
decoded = base64.b64decode(raw_data)
|
||||
except Exception as e:
|
||||
log.warning("Invalid base64 at seq %d: %s", seq, e)
|
||||
continue
|
||||
try:
|
||||
data = json.loads(decoded.decode("utf-8"))
|
||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||
log.warning("Invalid JSON at seq %d: %s", seq, e)
|
||||
continue
|
||||
if not isinstance(data, dict):
|
||||
log.warning("Expected dict at seq %d, got %s", seq, type(data).__name__)
|
||||
continue
|
||||
events.append(data)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
log.info(f"Fetched {len(events)} conversation events")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
log.warning("NATS command timed out")
|
||||
except FileNotFoundError:
|
||||
log.warning("nats CLI not found — skipping NATS extraction")
|
||||
|
||||
# Filter by time if --since specified
|
||||
if since and events:
|
||||
cutoff = parse_since(since)
|
||||
if cutoff:
|
||||
events = [e for e in events if e.get("timestamp", 0) / 1000 >= cutoff]
|
||||
|
||||
return events
|
||||
|
||||
|
||||
def parse_since(since_str):
|
||||
"""Parse duration string like '6h', '1d', '30m' to epoch timestamp."""
|
||||
m = re.match(r"(\d+)([hdm])", since_str)
|
||||
if not m:
|
||||
return None
|
||||
val, unit = int(m.group(1)), m.group(2)
|
||||
seconds = {"h": 3600, "d": 86400, "m": 60}[unit]
|
||||
return time.time() - (val * seconds)
|
||||
|
||||
|
||||
def extract_from_event(event, known_entities):
|
||||
"""Extract entities from a single event."""
|
||||
# Import extract_entities from entity_manager
|
||||
em = sys.modules.get("entity_manager_mod")
|
||||
if not em:
|
||||
# Load entity-manager module
|
||||
spec_path = Path(__file__).parent / "entity_manager.py"
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path)
|
||||
em = importlib.util.module_from_spec(spec)
|
||||
sys.modules["entity_manager_mod"] = em
|
||||
spec.loader.exec_module(em)
|
||||
|
||||
payload = event.get("payload", {})
|
||||
text = payload.get("text_preview", "") or payload.get("text", "")
|
||||
if isinstance(text, list):
|
||||
text = " ".join(str(t) for t in text)
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
|
||||
if not text:
|
||||
return {}, 0.0
|
||||
|
||||
score = importance_heuristic(text)
|
||||
if score < 0.4:
|
||||
return {}, score
|
||||
|
||||
found = em.extract_entities(text, known_entities)
|
||||
return found, score
|
||||
|
||||
|
||||
def run_extraction(last=None, since=None, dry_run=False):
|
||||
"""Main extraction pipeline."""
|
||||
log.info(f"Starting extraction (last={last}, since={since}, dry_run={dry_run})")
|
||||
|
||||
# Load known entities
|
||||
spec_path = Path(__file__).parent / "entity_manager.py"
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("entity_manager_mod", spec_path)
|
||||
em = importlib.util.module_from_spec(spec)
|
||||
sys.modules["entity_manager_mod"] = em
|
||||
spec.loader.exec_module(em)
|
||||
|
||||
known = em.load_known_entities()
|
||||
log.info(f"Loaded {len(known)} known entities")
|
||||
|
||||
# Fetch events
|
||||
events = fetch_events_nats(last=last, since=since)
|
||||
log.info(f"Fetched {len(events)} events from NATS")
|
||||
|
||||
if not events:
|
||||
log.info("No events to process")
|
||||
return
|
||||
|
||||
entities = em.load_json(ENTITIES_FILE)
|
||||
relationships = em.load_json(RELATIONSHIPS_FILE)
|
||||
|
||||
total_extracted = 0
|
||||
new_entities = 0
|
||||
new_relationships = 0
|
||||
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
for event in events:
|
||||
found, score = extract_from_event(event, known)
|
||||
if not found:
|
||||
continue
|
||||
|
||||
total_extracted += len(found)
|
||||
names = list(found.keys())
|
||||
|
||||
# Add new entities
|
||||
for name, info in found.items():
|
||||
if name not in entities:
|
||||
entities[name] = {
|
||||
"type": info["type"],
|
||||
"source": "nats-extraction",
|
||||
"first_seen": ts_now,
|
||||
}
|
||||
new_entities += 1
|
||||
known[name] = entities[name]
|
||||
|
||||
# Create co-occurrence relationships between entities found in same message
|
||||
if len(names) >= 2:
|
||||
for i in range(len(names)):
|
||||
for j in range(i + 1, min(len(names), i + 5)): # limit pairs
|
||||
a, b = min(names[i], names[j]), max(names[i], names[j])
|
||||
key = f"{a}::{b}"
|
||||
if key in relationships:
|
||||
relationships[key]["count"] = relationships[key].get("count", 1) + 1
|
||||
relationships[key]["last_seen"] = ts_now
|
||||
else:
|
||||
relationships[key] = {
|
||||
"a": a, "b": b,
|
||||
"types": ["co-occurrence"],
|
||||
"count": 1,
|
||||
"first_seen": ts_now,
|
||||
"last_seen": ts_now,
|
||||
}
|
||||
new_relationships += 1
|
||||
|
||||
if not dry_run and total_extracted % 50 == 0 and total_extracted > 0:
|
||||
# Periodic save
|
||||
em.save_json(ENTITIES_FILE, entities)
|
||||
em.save_json(RELATIONSHIPS_FILE, relationships)
|
||||
|
||||
if not dry_run:
|
||||
em.save_json(ENTITIES_FILE, entities)
|
||||
em.save_json(RELATIONSHIPS_FILE, relationships)
|
||||
|
||||
log.info(
|
||||
f"Done: {len(events)} events processed, {total_extracted} entities extracted, "
|
||||
f"{new_entities} new entities, {new_relationships} new relationships"
|
||||
)
|
||||
print(
|
||||
f"\nResults: {len(events)} events → {total_extracted} entities extracted, "
|
||||
f"{new_entities} new, {new_relationships} new relationships"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
last = None
|
||||
since = None
|
||||
dry_run = False
|
||||
|
||||
args = sys.argv[1:]
|
||||
i = 0
|
||||
while i < len(args):
|
||||
if args[i] == "--last" and i + 1 < len(args):
|
||||
last = int(args[i + 1])
|
||||
i += 2
|
||||
elif args[i] == "--since" and i + 1 < len(args):
|
||||
since = args[i + 1]
|
||||
i += 2
|
||||
elif args[i] == "--dry-run":
|
||||
dry_run = True
|
||||
i += 1
|
||||
else:
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
if last is None and since is None:
|
||||
last = 100 # default
|
||||
|
||||
run_extraction(last=last, since=since, dry_run=dry_run)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
214
cortex/llm_extractor.py
Normal file
214
cortex/llm_extractor.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
LLM-Powered Entity Extractor — Uses Ollama for Named Entity Recognition.
|
||||
|
||||
Standalone module. No pip dependencies beyond stdlib.
|
||||
Calls Ollama HTTP API with structured NER prompts.
|
||||
|
||||
Configuration via environment variables:
|
||||
DARKPLEX_OLLAMA_URL — Ollama base URL (default: http://localhost:11434)
|
||||
DARKPLEX_OLLAMA_MODEL — Model name (default: mistral:7b)
|
||||
DARKPLEX_OLLAMA_TIMEOUT — Timeout in seconds (default: 10)
|
||||
DARKPLEX_EXTRACTOR — llm|regex|auto (default: auto)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
log = logging.getLogger("llm-extractor")
|
||||
|
||||
OLLAMA_URL = os.environ.get("DARKPLEX_OLLAMA_URL", "http://localhost:11434")
|
||||
OLLAMA_MODEL = os.environ.get("DARKPLEX_OLLAMA_MODEL", "mistral:7b")
|
||||
OLLAMA_TIMEOUT = int(os.environ.get("DARKPLEX_OLLAMA_TIMEOUT", "30"))
|
||||
|
||||
VALID_TYPES = {"person", "organization", "company", "project", "technology",
|
||||
"location", "event", "concept", "product"}
|
||||
|
||||
NER_PROMPT = """Extract all named entities from the text below. Return ONLY a JSON object.
|
||||
Each key is the entity name (lowercase), each value has "type" and "context".
|
||||
|
||||
Valid types: person, organization, company, project, technology, location, event, concept, product
|
||||
|
||||
Rules:
|
||||
- Skip common/generic words (the, system, message, etc.)
|
||||
- Entity names should be lowercase, use hyphens for multi-word
|
||||
- "context" is a 2-5 word description of the entity's role in the text
|
||||
- If no entities found, return empty JSON object
|
||||
- Return ONLY valid JSON, no explanation
|
||||
|
||||
Text:
|
||||
{text}
|
||||
|
||||
JSON:"""
|
||||
|
||||
BATCH_PROMPT = """Extract all named entities from these texts. Return ONLY a JSON object.
|
||||
Each key is the entity name (lowercase, hyphens for spaces), each value has "type" and "context".
|
||||
|
||||
Valid types: person, organization, company, project, technology, location, event, concept, product
|
||||
|
||||
Rules:
|
||||
- Skip common/generic words
|
||||
- "context" is a 2-5 word description
|
||||
- If no entities found, return empty JSON object
|
||||
- Return ONLY valid JSON, no markdown, no explanation
|
||||
|
||||
Texts:
|
||||
{texts}
|
||||
|
||||
JSON:"""
|
||||
|
||||
|
||||
def _call_ollama(prompt: str) -> str | None:
|
||||
"""Call Ollama generate API. Returns response text or None on failure."""
|
||||
payload = json.dumps({
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 1024},
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/api/generate",
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
return data.get("response", "")
|
||||
except (urllib.error.URLError, TimeoutError, OSError) as e:
|
||||
log.warning(f"Ollama call failed: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
log.warning(f"Ollama unexpected error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _parse_json_response(text: str) -> dict:
|
||||
"""Extract JSON dict from LLM response, handling markdown fences etc."""
|
||||
if not text:
|
||||
return {}
|
||||
# Strip markdown code fences
|
||||
text = text.strip()
|
||||
if text.startswith("```"):
|
||||
lines = text.split("\n")
|
||||
lines = [l for l in lines if not l.strip().startswith("```")]
|
||||
text = "\n".join(lines)
|
||||
|
||||
# Find the JSON object
|
||||
start = text.find("{")
|
||||
if start == -1:
|
||||
return {}
|
||||
|
||||
# Find matching closing brace
|
||||
depth = 0
|
||||
for i in range(start, len(text)):
|
||||
if text[i] == "{":
|
||||
depth += 1
|
||||
elif text[i] == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
try:
|
||||
return json.loads(text[start:i + 1])
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _normalize_entities(raw: dict) -> dict:
|
||||
"""Normalize and validate extracted entities."""
|
||||
result = {}
|
||||
for name, info in raw.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
name = name.strip().lower().replace("_", "-").replace(" ", "-")
|
||||
if len(name) < 2 or len(name) > 80:
|
||||
continue
|
||||
|
||||
etype = info.get("type", "unknown").lower().strip()
|
||||
if etype not in VALID_TYPES:
|
||||
# Map common aliases
|
||||
aliases = {"org": "organization", "tech": "technology", "loc": "location",
|
||||
"place": "location", "tool": "technology", "framework": "technology",
|
||||
"language": "technology", "app": "product", "software": "product",
|
||||
"service": "product", "group": "organization", "team": "organization"}
|
||||
etype = aliases.get(etype, "concept")
|
||||
|
||||
context = info.get("context", "")
|
||||
if isinstance(context, str):
|
||||
context = context[:100]
|
||||
else:
|
||||
context = ""
|
||||
|
||||
result[name] = {"type": etype, "context": context, "match": "llm"}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def extract_entities_llm(text: str) -> dict[str, dict] | None:
|
||||
"""
|
||||
Extract entities from text using Ollama LLM.
|
||||
|
||||
Returns dict of {name: {type, context, match}} or None if LLM unavailable.
|
||||
None signals caller to fall back to regex.
|
||||
"""
|
||||
if not text or len(text) < 10:
|
||||
return {}
|
||||
|
||||
# Truncate very long texts
|
||||
if len(text) > 2000:
|
||||
text = text[:2000]
|
||||
|
||||
prompt = NER_PROMPT.format(text=text)
|
||||
response = _call_ollama(prompt)
|
||||
if response is None:
|
||||
return None # Signal fallback
|
||||
|
||||
raw = _parse_json_response(response)
|
||||
return _normalize_entities(raw)
|
||||
|
||||
|
||||
def extract_entities_llm_batch(texts: list[str]) -> dict[str, dict] | None:
|
||||
"""
|
||||
Extract entities from multiple texts in one LLM call.
|
||||
|
||||
Returns combined dict or None if LLM unavailable.
|
||||
"""
|
||||
if not texts:
|
||||
return {}
|
||||
|
||||
# Filter and truncate
|
||||
clean = []
|
||||
for t in texts:
|
||||
if t and len(t) >= 10:
|
||||
clean.append(t[:500] if len(t) > 500 else t)
|
||||
if not clean:
|
||||
return {}
|
||||
|
||||
# Limit batch size to keep prompt reasonable
|
||||
if len(clean) > 10:
|
||||
clean = clean[:10]
|
||||
|
||||
numbered = "\n".join(f"[{i+1}] {t}" for i, t in enumerate(clean))
|
||||
prompt = BATCH_PROMPT.format(texts=numbered)
|
||||
response = _call_ollama(prompt)
|
||||
if response is None:
|
||||
return None
|
||||
|
||||
raw = _parse_json_response(response)
|
||||
return _normalize_entities(raw)
|
||||
|
||||
|
||||
def is_available() -> bool:
|
||||
"""Check if Ollama is reachable."""
|
||||
try:
|
||||
req = urllib.request.Request(f"{OLLAMA_URL}/api/tags", method="GET")
|
||||
with urllib.request.urlopen(req, timeout=3) as resp:
|
||||
return resp.status == 200
|
||||
except Exception:
|
||||
return False
|
||||
701
cortex/loop.py
Normal file
701
cortex/loop.py
Normal file
|
|
@ -0,0 +1,701 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Darkplex Loop — The single heartbeat of the intelligence pipeline.
|
||||
|
||||
One process. One loop. One state machine.
|
||||
Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
|
||||
|
||||
Each cycle:
|
||||
1. INGEST — Fetch new events from NATS (batch consumer pull)
|
||||
2. EXTRACT — Pull entities and relationships from events
|
||||
3. BRIDGE — Sync cortex outputs to knowledge engine
|
||||
4. VERIFY — Check that real output was produced
|
||||
5. REPORT — Update state, alert on failure
|
||||
|
||||
States:
|
||||
RUNNING — Everything nominal
|
||||
DEGRADED — A step failed, but loop continues with recovery attempts
|
||||
EMERGENCY — Critical failure, alerting
|
||||
|
||||
Usage:
|
||||
darkplex loop # Run loop (default: 1h cycle)
|
||||
darkplex loop --once # Single cycle, then exit
|
||||
darkplex loop --cycle 3600 # Custom cycle interval (seconds)
|
||||
darkplex loop --status # Print current state and exit
|
||||
darkplex loop --check # Check for new events, exit 0=new 1=none
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ── Paths (configurable via env) ─────────────────────────────────────────────
|
||||
|
||||
BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
|
||||
SCRIPT_DIR = BASE_DIR / "scripts"
|
||||
LEVEL4_DIR = SCRIPT_DIR / "level4"
|
||||
LOG_DIR = BASE_DIR / "logs"
|
||||
STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
|
||||
KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
|
||||
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
|
||||
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
|
||||
|
||||
NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
|
||||
NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
|
||||
NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
|
||||
DEFAULT_CYCLE_SECONDS = 3600 # 1 hour
|
||||
ALERT_COOLDOWN = 3600 # 1 alert per hour max
|
||||
|
||||
log = logging.getLogger("darkplex-loop")
|
||||
|
||||
|
||||
# ── State Machine ────────────────────────────────────────────────────────────
|
||||
|
||||
class LoopState:
|
||||
"""Persistent state for the Darkplex Loop."""
|
||||
|
||||
def __init__(self):
|
||||
self.status = "INIT"
|
||||
self.cycle_count = 0
|
||||
self.last_cycle = None
|
||||
self.last_success = None
|
||||
self.last_failure = None
|
||||
self.last_alert = None
|
||||
self.consecutive_failures = 0
|
||||
self.entities_total = 0
|
||||
self.relationships_total = 0
|
||||
self.entities_extracted_last = 0
|
||||
self.entities_new_last = 0
|
||||
self.events_processed_last = 0
|
||||
self.steps = {}
|
||||
self.error = None
|
||||
self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
|
||||
self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}]
|
||||
self._load()
|
||||
|
||||
def _load(self):
|
||||
try:
|
||||
data = json.loads(STATE_FILE.read_text())
|
||||
for k, v in data.items():
|
||||
if hasattr(self, k):
|
||||
setattr(self, k, v)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
def save(self):
|
||||
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
|
||||
|
||||
def record_perf(self, perf: dict):
|
||||
"""Record performance metrics for this cycle."""
|
||||
self.perf = perf
|
||||
self.perf_history.append(perf)
|
||||
self.perf_history = self.perf_history[-10:] # keep last 10
|
||||
|
||||
def perf_averages(self) -> dict:
|
||||
"""Running averages over last 10 cycles."""
|
||||
if not self.perf_history:
|
||||
return {}
|
||||
keys = self.perf_history[0].keys()
|
||||
return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
|
||||
|
||||
def record_success(self, step_results: dict):
|
||||
self.status = "RUNNING"
|
||||
self.consecutive_failures = 0
|
||||
self.last_success = datetime.now(timezone.utc).isoformat()
|
||||
self.last_cycle = self.last_success
|
||||
self.cycle_count += 1
|
||||
self.steps = step_results
|
||||
self.error = None
|
||||
self.save()
|
||||
|
||||
def record_failure(self, step: str, error: str):
|
||||
self.consecutive_failures += 1
|
||||
self.last_failure = datetime.now(timezone.utc).isoformat()
|
||||
self.last_cycle = self.last_failure
|
||||
self.cycle_count += 1
|
||||
self.error = f"{step}: {error}"
|
||||
if self.consecutive_failures >= 3:
|
||||
self.status = "EMERGENCY"
|
||||
else:
|
||||
self.status = "DEGRADED"
|
||||
self.save()
|
||||
|
||||
def can_alert(self) -> bool:
|
||||
if not self.last_alert:
|
||||
return True
|
||||
try:
|
||||
last = datetime.fromisoformat(self.last_alert)
|
||||
return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
|
||||
except (ValueError, TypeError):
|
||||
return True
|
||||
|
||||
def mark_alerted(self):
|
||||
self.last_alert = datetime.now(timezone.utc).isoformat()
|
||||
self.save()
|
||||
|
||||
|
||||
# ── Pipeline Steps ───────────────────────────────────────────────────────────
|
||||
|
||||
def _nats_cmd():
|
||||
"""Build NATS CLI base command with auth."""
|
||||
nats_bin = os.environ.get("NATS_BIN", "nats")
|
||||
nats_url = os.environ.get("NATS_URL", "")
|
||||
if nats_url:
|
||||
return [nats_bin, "-s", nats_url]
|
||||
return [nats_bin]
|
||||
|
||||
|
||||
def check_new_events() -> int:
|
||||
"""Return number of pending events in the consumer. 0 = nothing new."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return -1
|
||||
info = json.loads(r.stdout)
|
||||
return info.get("num_pending", 0)
|
||||
except Exception as e:
|
||||
log.warning(f"check_new_events failed: {e}")
|
||||
return -1
|
||||
|
||||
|
||||
def step_ingest(state: LoopState) -> dict:
|
||||
"""Step 1: Fetch new events from NATS using batch consumer pull."""
|
||||
log.info("STEP 1: INGEST — Fetching events from NATS")
|
||||
|
||||
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
|
||||
|
||||
# Check how many pending
|
||||
pending = check_new_events()
|
||||
if pending == 0:
|
||||
log.info("INGEST: No new events — skipping cycle")
|
||||
return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
|
||||
log.info(f"INGEST: {pending} pending events in consumer")
|
||||
|
||||
events = []
|
||||
total_fetched = 0
|
||||
parse_errors = 0
|
||||
|
||||
# Fetch in batches
|
||||
remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
|
||||
try:
|
||||
batch_size = min(remaining, NATS_BATCH_SIZE)
|
||||
result = subprocess.run(
|
||||
_nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
|
||||
"--count", str(batch_size), "--raw"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
|
||||
return _step_ingest_sequential(state)
|
||||
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
events.append(data)
|
||||
total_fetched += 1
|
||||
except json.JSONDecodeError:
|
||||
parse_errors += 1
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
log.warning("Batch fetch timed out, falling back to sequential")
|
||||
return _step_ingest_sequential(state)
|
||||
|
||||
# Update sequence tracking (get current stream seq from consumer info)
|
||||
try:
|
||||
r = subprocess.run(
|
||||
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode == 0:
|
||||
info = json.loads(r.stdout)
|
||||
stream_seq = info["delivered"]["stream_seq"]
|
||||
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
|
||||
except Exception:
|
||||
log.warning("Could not save last processed sequence")
|
||||
|
||||
log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
|
||||
return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
|
||||
|
||||
|
||||
def _step_ingest_sequential(state: LoopState) -> dict:
|
||||
"""Fallback: sequential fetch via stream get (slow but reliable)."""
|
||||
import base64
|
||||
log.info("INGEST FALLBACK: Sequential fetch")
|
||||
|
||||
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
|
||||
last_processed_seq = 0
|
||||
try:
|
||||
if last_processed_seq_file.exists():
|
||||
last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
r = subprocess.run(
|
||||
_nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return {"events": [], "total_scanned": 0, "skipped": 0}
|
||||
|
||||
info = json.loads(r.stdout)
|
||||
end_seq = info["state"]["last_seq"]
|
||||
start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
|
||||
|
||||
events = []
|
||||
skipped = 0
|
||||
for seq in range(start_seq, end_seq + 1):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
_nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
skipped += 1
|
||||
continue
|
||||
msg = json.loads(result.stdout)
|
||||
if "conversation_message_in" not in msg.get("subject", ""):
|
||||
skipped += 1
|
||||
continue
|
||||
data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
|
||||
events.append(data)
|
||||
except Exception:
|
||||
skipped += 1
|
||||
|
||||
try:
|
||||
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
|
||||
return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
|
||||
|
||||
|
||||
def step_extract(state: LoopState, events: list) -> dict:
|
||||
"""Step 2: Extract entities and relationships from events."""
|
||||
log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
|
||||
|
||||
if not events:
|
||||
log.info("EXTRACT: No events to process")
|
||||
return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
|
||||
|
||||
sys.path.insert(0, str(LEVEL4_DIR))
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
|
||||
em = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(em)
|
||||
|
||||
# Try LLM batch extraction first
|
||||
from llm_extractor import extract_entities_llm_batch, is_available as llm_available
|
||||
use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
|
||||
llm_ok = use_llm and llm_available()
|
||||
if llm_ok:
|
||||
log.info("EXTRACT: Using LLM extractor (Ollama)")
|
||||
else:
|
||||
log.info("EXTRACT: Using regex extractor (fallback)")
|
||||
|
||||
known = em.load_known_entities()
|
||||
entities = em.load_json(ENTITIES_FILE)
|
||||
relationships = em.load_json(RELATIONSHIPS_FILE)
|
||||
|
||||
total_extracted = 0
|
||||
new_entities = 0
|
||||
new_relationships = 0
|
||||
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
# Prepare texts for potential batch LLM processing
|
||||
event_texts = []
|
||||
for event in events:
|
||||
payload = event.get("payload", {})
|
||||
text = payload.get("text_preview", "") or payload.get("text", "")
|
||||
if isinstance(text, list):
|
||||
parts = []
|
||||
for t in text:
|
||||
parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
|
||||
text = " ".join(parts)
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
score = _importance(text) if text else 0.0
|
||||
event_texts.append((text, score))
|
||||
|
||||
# LLM batch extraction for qualifying texts
|
||||
llm_results = {}
|
||||
if llm_ok:
|
||||
batch_texts = [t for t, s in event_texts if t and s >= 0.4]
|
||||
if batch_texts:
|
||||
consecutive_fails = 0
|
||||
for i in range(0, len(batch_texts), 10):
|
||||
if consecutive_fails >= 3:
|
||||
log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
|
||||
llm_ok = False
|
||||
break
|
||||
chunk = batch_texts[i:i+10]
|
||||
batch_result = extract_entities_llm_batch(chunk)
|
||||
if batch_result:
|
||||
llm_results.update(batch_result)
|
||||
consecutive_fails = 0
|
||||
else:
|
||||
consecutive_fails += 1
|
||||
if llm_results:
|
||||
log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
|
||||
|
||||
for idx, event in enumerate(events):
|
||||
text, score = event_texts[idx]
|
||||
if not text or score < 0.4:
|
||||
continue
|
||||
|
||||
if llm_ok and llm_results:
|
||||
# Use LLM results + known entity matching
|
||||
found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
|
||||
# Add LLM entities that appear in this text
|
||||
text_lower = text.lower()
|
||||
for name, info in llm_results.items():
|
||||
variants = [name, name.replace("-", " "), name.replace("-", "")]
|
||||
if any(v in text_lower for v in variants if len(v) > 2):
|
||||
found[name] = info
|
||||
else:
|
||||
found = em.extract_entities(text, known)
|
||||
if not found:
|
||||
continue
|
||||
|
||||
total_extracted += len(found)
|
||||
names = list(found.keys())
|
||||
|
||||
for name, info in found.items():
|
||||
if name not in entities:
|
||||
entities[name] = {
|
||||
"type": info["type"],
|
||||
"source": "darkplex-loop",
|
||||
"first_seen": ts_now,
|
||||
}
|
||||
new_entities += 1
|
||||
known[name] = entities[name]
|
||||
|
||||
if len(names) >= 2:
|
||||
for i in range(len(names)):
|
||||
for j in range(i + 1, min(len(names), i + 5)):
|
||||
a, b = min(names[i], names[j]), max(names[i], names[j])
|
||||
key = f"{a}::{b}"
|
||||
if key in relationships:
|
||||
relationships[key]["count"] = relationships[key].get("count", 1) + 1
|
||||
relationships[key]["last_seen"] = ts_now
|
||||
else:
|
||||
relationships[key] = {
|
||||
"a": a, "b": b, "types": ["co-occurrence"],
|
||||
"count": 1, "first_seen": ts_now, "last_seen": ts_now,
|
||||
}
|
||||
new_relationships += 1
|
||||
|
||||
em.save_json(ENTITIES_FILE, entities)
|
||||
em.save_json(RELATIONSHIPS_FILE, relationships)
|
||||
|
||||
state.entities_total = len(entities)
|
||||
state.relationships_total = len(relationships)
|
||||
state.entities_extracted_last = total_extracted
|
||||
state.entities_new_last = new_entities
|
||||
state.events_processed_last = len(events)
|
||||
|
||||
log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
|
||||
return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
|
||||
|
||||
|
||||
def step_bridge(state: LoopState) -> dict:
|
||||
"""Step 3: Run knowledge bridge."""
|
||||
log.info("STEP 3: BRIDGE — Syncing cortex outputs")
|
||||
|
||||
bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
|
||||
if not bridge_script.exists():
|
||||
log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
|
||||
return {"status": "skipped", "reason": "script not found"}
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(bridge_script), "sync"],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
|
||||
return {"status": "failed", "error": result.stderr[:200]}
|
||||
|
||||
bridged = 0
|
||||
for line in result.stdout.split("\n"):
|
||||
m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
|
||||
if m:
|
||||
bridged += int(m.group(1))
|
||||
|
||||
log.info(f"BRIDGE: {bridged} items bridged")
|
||||
return {"status": "ok", "bridged": bridged}
|
||||
|
||||
|
||||
def step_verify(state: LoopState, extract_result: dict) -> dict:
|
||||
"""Step 4: Verify output quality."""
|
||||
log.info("STEP 4: VERIFY — Checking output quality")
|
||||
|
||||
issues = []
|
||||
|
||||
for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
|
||||
if not f.exists():
|
||||
issues.append(f"{label} file missing")
|
||||
else:
|
||||
try:
|
||||
data = json.loads(f.read_text())
|
||||
if not data:
|
||||
issues.append(f"{label} file is empty")
|
||||
except json.JSONDecodeError:
|
||||
issues.append(f"{label} file is corrupt JSON")
|
||||
|
||||
events_processed = state.events_processed_last
|
||||
extracted = extract_result.get("extracted", 0)
|
||||
if events_processed > 10 and extracted == 0:
|
||||
issues.append(f"0 entities from {events_processed} events — extraction may be broken")
|
||||
|
||||
try:
|
||||
r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
|
||||
if r.returncode != 0:
|
||||
issues.append("NATS unreachable")
|
||||
except Exception as e:
|
||||
issues.append(f"NATS check failed: {e}")
|
||||
|
||||
verdict = "PASS" if not issues else "FAIL"
|
||||
log.info(f"VERIFY: {verdict} — {len(issues)} issues")
|
||||
for issue in issues:
|
||||
log.warning(f" ⚠ {issue}")
|
||||
|
||||
return {"verdict": verdict, "issues": issues}
|
||||
|
||||
|
||||
def step_report(state: LoopState, verify_result: dict):
|
||||
"""Step 5: Alert if degraded/emergency."""
|
||||
if state.status == "RUNNING":
|
||||
return
|
||||
|
||||
if not state.can_alert():
|
||||
log.info("REPORT: Alert cooldown active, skipping")
|
||||
return
|
||||
|
||||
severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
|
||||
msg = (
|
||||
f"Darkplex Loop {severity}\n"
|
||||
f"Consecutive failures: {state.consecutive_failures}\n"
|
||||
f"Error: {state.error}\n"
|
||||
f"Issues: {', '.join(verify_result.get('issues', []))}"
|
||||
)
|
||||
|
||||
log.warning(f"REPORT: Sending alert — {state.status}")
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
flag = LOG_DIR / "darkplex-loop-alert.flag"
|
||||
flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
|
||||
state.mark_alerted()
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _importance(text: str) -> float:
|
||||
"""Importance scoring for event text."""
|
||||
if not text:
|
||||
return 0.0
|
||||
score = 0.3
|
||||
if len(text) > 200: score += 0.1
|
||||
if len(text) > 500: score += 0.1
|
||||
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
|
||||
if caps > 3: score += 0.1
|
||||
if caps > 8: score += 0.1
|
||||
for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
|
||||
if p.lower() in text.lower():
|
||||
score -= 0.3
|
||||
for w in ["meeting", "project", "company", "contract", "decision", "strategy",
|
||||
"budget", "deadline", "milestone", "partnership", "investment", "revenue",
|
||||
"client", "proposal", "agreement"]:
|
||||
if w in text.lower():
|
||||
score += 0.05
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
|
||||
def print_status():
|
||||
"""Print current loop state."""
|
||||
state = LoopState()
|
||||
|
||||
ent_count = rel_count = 0
|
||||
try:
|
||||
ent_count = len(json.loads(ENTITIES_FILE.read_text()))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "⚪")
|
||||
print(f"{icon} Status: {state.status}")
|
||||
print(f"Cycles: {state.cycle_count}")
|
||||
print(f"Last cycle: {state.last_cycle or 'never'}")
|
||||
print(f"Last success: {state.last_success or 'never'}")
|
||||
print(f"Last failure: {state.last_failure or 'never'}")
|
||||
print(f"Failures: {state.consecutive_failures}")
|
||||
print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
|
||||
print(f"Relationships:{rel_count} total")
|
||||
if state.error:
|
||||
print(f"Error: {state.error}")
|
||||
|
||||
|
||||
# ── Main Loop ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _ms_since(t0: float) -> int:
|
||||
return int((time.monotonic() - t0) * 1000)
|
||||
|
||||
|
||||
def run_cycle(state: LoopState) -> bool:
|
||||
"""Run one complete pipeline cycle. Returns True on success."""
|
||||
log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
|
||||
step_results = {}
|
||||
perf = {}
|
||||
t_cycle = time.monotonic()
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
ingest = step_ingest(state)
|
||||
perf["ingest_ms"] = _ms_since(t0)
|
||||
step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
|
||||
|
||||
# Early skip if no new events
|
||||
if ingest.get("skip_reason") == "no_new_events":
|
||||
perf["total_ms"] = _ms_since(t_cycle)
|
||||
state.record_perf(perf)
|
||||
state.save()
|
||||
log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
|
||||
return True
|
||||
|
||||
t0 = time.monotonic()
|
||||
extract = step_extract(state, ingest["events"])
|
||||
perf["extract_ms"] = _ms_since(t0)
|
||||
step_results["extract"] = extract
|
||||
|
||||
t0 = time.monotonic()
|
||||
bridge = step_bridge(state)
|
||||
perf["bridge_ms"] = _ms_since(t0)
|
||||
step_results["bridge"] = bridge
|
||||
|
||||
t0 = time.monotonic()
|
||||
verify = step_verify(state, extract)
|
||||
perf["verify_ms"] = _ms_since(t0)
|
||||
step_results["verify"] = verify
|
||||
|
||||
perf["total_ms"] = _ms_since(t_cycle)
|
||||
state.record_perf(perf)
|
||||
|
||||
if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
|
||||
state.record_failure("verify", "; ".join(verify["issues"]))
|
||||
step_report(state, verify)
|
||||
return False
|
||||
|
||||
state.record_success(step_results)
|
||||
avgs = state.perf_averages()
|
||||
log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status} — {perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
|
||||
log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
|
||||
|
||||
flag = LOG_DIR / "darkplex-loop-alert.flag"
|
||||
if flag.exists():
|
||||
flag.unlink()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
perf["total_ms"] = _ms_since(t_cycle)
|
||||
state.record_perf(perf)
|
||||
step_name = "unknown"
|
||||
for name in ["ingest", "extract", "bridge", "verify"]:
|
||||
if name not in step_results:
|
||||
step_name = name
|
||||
break
|
||||
log.error(f"CYCLE FAILED at {step_name}: {e}")
|
||||
log.error(traceback.format_exc())
|
||||
state.record_failure(step_name, str(e)[:300])
|
||||
step_report(state, {"issues": [str(e)]})
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for `darkplex loop`."""
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
args = sys.argv[1:]
|
||||
|
||||
if "--status" in args:
|
||||
print_status()
|
||||
return
|
||||
|
||||
if "--check" in args:
|
||||
pending = check_new_events()
|
||||
if pending > 0:
|
||||
print(f"NEW: {pending} events pending")
|
||||
sys.exit(0)
|
||||
elif pending == 0:
|
||||
print("NONE: No new events")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("ERROR: Could not check")
|
||||
sys.exit(2)
|
||||
|
||||
once = "--once" in args
|
||||
cycle_seconds = DEFAULT_CYCLE_SECONDS
|
||||
|
||||
for i, arg in enumerate(args):
|
||||
if arg == "--cycle" and i + 1 < len(args):
|
||||
cycle_seconds = int(args[i + 1])
|
||||
|
||||
state = LoopState()
|
||||
log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
|
||||
|
||||
running = True
|
||||
def handle_signal(sig, frame):
|
||||
nonlocal running
|
||||
log.info("Shutdown signal received")
|
||||
running = False
|
||||
signal.signal(signal.SIGTERM, handle_signal)
|
||||
signal.signal(signal.SIGINT, handle_signal)
|
||||
|
||||
while running:
|
||||
run_cycle(state)
|
||||
|
||||
if once:
|
||||
break
|
||||
|
||||
log.info(f"Sleeping {cycle_seconds}s until next cycle...")
|
||||
for _ in range(cycle_seconds):
|
||||
if not running:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
log.info("Darkplex Loop stopped")
|
||||
|
|
@ -3,9 +3,9 @@ requires = ["setuptools>=68.0", "wheel"]
|
|||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "cortex"
|
||||
version = "0.1.0"
|
||||
description = "Intelligence layer for OpenClaw — triage, health, feedback, memory hygiene, roadmap, validation"
|
||||
name = "darkplex-core"
|
||||
version = "0.2.0"
|
||||
description = "Darkplex Intelligence Layer — triage, health, feedback, governance, knowledge extraction, memory hygiene, roadmap, validation"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
license = {text = "MIT"}
|
||||
|
|
@ -15,6 +15,7 @@ authors = [
|
|||
|
||||
[project.scripts]
|
||||
cortex = "cortex.cli:main"
|
||||
darkplex = "cortex.cli:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["cortex*"]
|
||||
|
|
|
|||
106
tests/test_anticipator.py
Normal file
106
tests/test_anticipator.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
"""Tests for intelligence/anticipator module."""
|
||||
|
||||
import sys
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from cortex.intelligence.anticipator import (
|
||||
AlertSeverity,
|
||||
Anticipator,
|
||||
PatternDefinition,
|
||||
Prediction,
|
||||
_detect_recurring_errors,
|
||||
_detect_ssl_expiry,
|
||||
_detect_usage_spike,
|
||||
)
|
||||
|
||||
|
||||
class TestAnticipatorInit:
|
||||
def test_creates_with_builtin_patterns(self):
|
||||
a = Anticipator()
|
||||
assert len(a.patterns) == 3
|
||||
|
||||
def test_register_custom_pattern(self):
|
||||
a = Anticipator()
|
||||
p = PatternDefinition(name="test", description="test", detector=lambda e: None)
|
||||
a.register_pattern(p)
|
||||
assert len(a.patterns) == 4
|
||||
|
||||
|
||||
class TestAnalyze:
|
||||
def test_empty_events(self):
|
||||
a = Anticipator()
|
||||
result = a.analyze([])
|
||||
assert result == []
|
||||
|
||||
def test_no_matching_patterns(self):
|
||||
a = Anticipator()
|
||||
result = a.analyze([{"type": "unrelated", "data": {}}])
|
||||
assert result == []
|
||||
|
||||
def test_detector_exception_handled(self):
|
||||
def bad_detector(events):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
a = Anticipator()
|
||||
a.patterns = [PatternDefinition(name="bad", description="", detector=bad_detector)]
|
||||
result = a.analyze([{}])
|
||||
assert result == []
|
||||
|
||||
|
||||
class TestSSLExpiry:
|
||||
def test_no_ssl_events(self):
|
||||
assert _detect_ssl_expiry([{"type": "other"}]) is None
|
||||
|
||||
def test_expiring_soon(self):
|
||||
expiry = (datetime.now(timezone.utc) + timedelta(days=5)).isoformat()
|
||||
events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
|
||||
result = _detect_ssl_expiry(events)
|
||||
assert result is not None
|
||||
assert result.severity == AlertSeverity.WARNING
|
||||
|
||||
def test_expiring_critical(self):
|
||||
expiry = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()
|
||||
events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
|
||||
result = _detect_ssl_expiry(events)
|
||||
assert result.severity == AlertSeverity.CRITICAL
|
||||
|
||||
def test_not_expiring(self):
|
||||
expiry = (datetime.now(timezone.utc) + timedelta(days=60)).isoformat()
|
||||
events = [{"type": "ssl_cert_check", "data": {"expiry": expiry, "domain": "example.com"}}]
|
||||
assert _detect_ssl_expiry(events) is None
|
||||
|
||||
|
||||
class TestRecurringErrors:
|
||||
def test_no_errors(self):
|
||||
assert _detect_recurring_errors([]) is None
|
||||
|
||||
def test_few_errors(self):
|
||||
events = [{"type": "error", "data": {"error_type": "timeout"}}] * 2
|
||||
assert _detect_recurring_errors(events) is None
|
||||
|
||||
def test_recurring_detected(self):
|
||||
events = [{"type": "error", "data": {"error_type": "timeout"}}] * 5
|
||||
result = _detect_recurring_errors(events)
|
||||
assert result is not None
|
||||
assert result.metadata["count"] == 5
|
||||
|
||||
|
||||
class TestUsageSpike:
|
||||
def test_insufficient_data(self):
|
||||
assert _detect_usage_spike([]) is None
|
||||
|
||||
def test_normal_usage(self):
|
||||
events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(15)]
|
||||
assert _detect_usage_spike(events) is None
|
||||
|
||||
def test_spike_detected(self):
|
||||
events = [{"type": "usage_metric", "data": {"value": 10}} for _ in range(12)]
|
||||
events[-1]["data"]["value"] = 100
|
||||
events[-2]["data"]["value"] = 100
|
||||
events[-3]["data"]["value"] = 100
|
||||
result = _detect_usage_spike(events)
|
||||
assert result is not None
|
||||
112
tests/test_collective.py
Normal file
112
tests/test_collective.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
"""Tests for intelligence/collective module."""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from cortex.intelligence.shared_memory import Insight, SharedMemory, ALLOWED_AGENTS
|
||||
from cortex.intelligence.collective import AggregatedPattern, CollectiveLearning
|
||||
|
||||
|
||||
class TestCollectiveLearningInit:
|
||||
def test_init(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
assert cl._patterns == []
|
||||
assert len(cl._insights_by_topic) == 0
|
||||
|
||||
|
||||
class TestPatternDetection:
|
||||
def test_no_patterns_with_single_agent(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
# Add insights from same agent
|
||||
agent = list(ALLOWED_AGENTS)[0]
|
||||
for i in range(5):
|
||||
cl._insights_by_topic["infra"].append(
|
||||
Insight(agent=agent, topic="infra", content=f"test {i}")
|
||||
)
|
||||
cl._detect_patterns()
|
||||
assert len(cl._patterns) == 0
|
||||
|
||||
def test_pattern_with_multiple_agents(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
agents = list(ALLOWED_AGENTS)[:2]
|
||||
cl._insights_by_topic["infra"].append(
|
||||
Insight(agent=agents[0], topic="infra", content="observation 1")
|
||||
)
|
||||
cl._insights_by_topic["infra"].append(
|
||||
Insight(agent=agents[1], topic="infra", content="observation 2")
|
||||
)
|
||||
cl._detect_patterns()
|
||||
assert len(cl._patterns) == 1
|
||||
assert cl._patterns[0].topic == "infra"
|
||||
|
||||
|
||||
class TestGetPatterns:
|
||||
def test_filter_by_topic(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
agents = list(ALLOWED_AGENTS)[:2]
|
||||
for topic in ["infra", "security"]:
|
||||
for agent in agents:
|
||||
cl._insights_by_topic[topic].append(
|
||||
Insight(agent=agent, topic=topic, content="test")
|
||||
)
|
||||
cl._detect_patterns()
|
||||
assert len(cl.get_patterns(topic="infra")) == 1
|
||||
|
||||
def test_filter_by_confidence(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
agents = list(ALLOWED_AGENTS)[:2]
|
||||
cl._insights_by_topic["low"].append(
|
||||
Insight(agent=agents[0], topic="low", content="x", confidence=0.1)
|
||||
)
|
||||
cl._insights_by_topic["low"].append(
|
||||
Insight(agent=agents[1], topic="low", content="y", confidence=0.1)
|
||||
)
|
||||
cl._detect_patterns()
|
||||
assert len(cl.get_patterns(min_confidence=0.5)) == 0
|
||||
|
||||
|
||||
class TestTopicSummary:
|
||||
def test_empty(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
assert cl.get_topic_summary() == {}
|
||||
|
||||
|
||||
class TestExportKnowledge:
|
||||
def test_export_json(self):
|
||||
import json
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
data = json.loads(cl.export_knowledge())
|
||||
assert "patterns" in data
|
||||
assert "topics" in data
|
||||
assert "allowed_agents" in data
|
||||
|
||||
|
||||
class TestHandleInsight:
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_non_allowed_agent(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
insight = Insight(agent="unauthorized_agent", topic="test", content="bad")
|
||||
await cl._handle_insight(insight)
|
||||
assert len(cl._insights_by_topic) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_accepts_allowed_agent(self):
|
||||
sm = mock.AsyncMock(spec=SharedMemory)
|
||||
cl = CollectiveLearning(sm)
|
||||
agent = list(ALLOWED_AGENTS)[0]
|
||||
insight = Insight(agent=agent, topic="test", content="good")
|
||||
await cl._handle_insight(insight)
|
||||
assert len(cl._insights_by_topic["test"]) == 1
|
||||
111
tests/test_entity_manager.py
Normal file
111
tests/test_entity_manager.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""Tests for entity_manager module."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
# Add parent to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
import cortex.entity_manager as em
|
||||
|
||||
|
||||
class TestNormalize:
|
||||
def test_basic(self):
|
||||
assert em.normalize("Hello World") == "hello world"
|
||||
|
||||
def test_underscores(self):
|
||||
assert em.normalize("my_entity") == "my-entity"
|
||||
|
||||
def test_whitespace(self):
|
||||
assert em.normalize(" test ") == "test"
|
||||
|
||||
|
||||
class TestLoadJson:
|
||||
def test_missing_file(self):
|
||||
assert em.load_json(Path("/nonexistent/file.json")) == {}
|
||||
|
||||
def test_valid_json(self, tmp_path):
|
||||
f = tmp_path / "test.json"
|
||||
f.write_text('{"key": "value"}')
|
||||
assert em.load_json(f) == {"key": "value"}
|
||||
|
||||
def test_invalid_json(self, tmp_path):
|
||||
f = tmp_path / "bad.json"
|
||||
f.write_text("not json")
|
||||
assert em.load_json(f) == {}
|
||||
|
||||
|
||||
class TestSaveJson:
|
||||
def test_creates_dirs(self, tmp_path):
|
||||
f = tmp_path / "sub" / "dir" / "test.json"
|
||||
em.save_json(f, {"hello": "world"})
|
||||
assert json.loads(f.read_text()) == {"hello": "world"}
|
||||
|
||||
|
||||
class TestExtractEntities:
|
||||
def test_known_entity(self):
|
||||
known = {"acme-corp": {"type": "company"}}
|
||||
result = em.extract_entities("Working with Acme Corp today", known)
|
||||
assert "acme-corp" in result
|
||||
|
||||
def test_mention(self):
|
||||
result = em.extract_entities("Talked to @johndoe about it", {})
|
||||
assert "johndoe" in result
|
||||
assert result["johndoe"]["type"] == "person"
|
||||
|
||||
def test_capitalized_multi_word(self):
|
||||
result = em.extract_entities("Met with John Smith yesterday", {})
|
||||
assert "john smith" in result
|
||||
|
||||
def test_acronym(self):
|
||||
result = em.extract_entities("The ACME project is going well", {})
|
||||
assert "acme" in result
|
||||
assert result["acme"]["type"] == "organization"
|
||||
|
||||
def test_stop_words_filtered(self):
|
||||
result = em.extract_entities("The system is working fine", {})
|
||||
# None of these should be extracted as entities
|
||||
for word in ["the", "system", "working"]:
|
||||
assert word not in result
|
||||
|
||||
def test_empty_text(self):
|
||||
result = em.extract_entities("", {})
|
||||
assert result == {}
|
||||
|
||||
def test_short_mention_filtered(self):
|
||||
"""Mentions shorter than 3 chars should be filtered."""
|
||||
result = em.extract_entities("@ab said hi", {})
|
||||
assert "ab" not in result
|
||||
|
||||
|
||||
class TestCmdBootstrap:
|
||||
def test_bootstrap_with_empty_areas(self, tmp_path):
|
||||
with mock.patch.object(em, "LIFE_AREAS", tmp_path):
|
||||
with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
|
||||
with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
|
||||
em.cmd_bootstrap()
|
||||
assert (tmp_path / "entities.json").exists()
|
||||
|
||||
|
||||
class TestCmdRelate:
|
||||
def test_create_relationship(self, tmp_path):
|
||||
with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
|
||||
with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
|
||||
em.cmd_relate("Alice", "Bob", "colleague")
|
||||
rels = json.loads((tmp_path / "rels.json").read_text())
|
||||
assert len(rels) == 1
|
||||
key = list(rels.keys())[0]
|
||||
assert "colleague" in rels[key]["types"]
|
||||
|
||||
def test_update_relationship(self, tmp_path):
|
||||
with mock.patch.object(em, "RELATIONSHIPS_FILE", tmp_path / "rels.json"):
|
||||
with mock.patch.object(em, "ENTITIES_FILE", tmp_path / "entities.json"):
|
||||
em.cmd_relate("Alice", "Bob", "colleague")
|
||||
em.cmd_relate("Alice", "Bob", "friend")
|
||||
rels = json.loads((tmp_path / "rels.json").read_text())
|
||||
key = list(rels.keys())[0]
|
||||
assert rels[key]["count"] == 2
|
||||
79
tests/test_governance_enforcer.py
Normal file
79
tests/test_governance_enforcer.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""Tests for governance/enforcer.py — Runtime Enforcer."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
|
||||
from governance.enforcer import Enforcer, Decision
|
||||
from governance.policy import PolicyEngine
|
||||
from governance.risk_scorer import RiskScorer
|
||||
from governance.evidence import EvidenceCollector, ControlMapping
|
||||
|
||||
|
||||
def _make_enforcer(tmp_path, rules=None):
|
||||
if rules:
|
||||
policy_file = tmp_path / "test.yaml"
|
||||
policy_file.write_text(yaml.dump({
|
||||
"name": "test", "description": "", "version": "1",
|
||||
"rules": rules,
|
||||
}))
|
||||
engine = PolicyEngine(policies_dir=str(tmp_path))
|
||||
scorer = RiskScorer()
|
||||
collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
|
||||
return Enforcer(policy_engine=engine, risk_scorer=scorer, evidence_collector=collector)
|
||||
|
||||
|
||||
class TestDecision:
|
||||
def test_approved(self):
|
||||
from governance.risk_scorer import RiskResult
|
||||
d = Decision(verdict="approve", reason="ok", risk=RiskResult(0, "low", {}), policy_result={})
|
||||
assert d.approved
|
||||
|
||||
def test_not_approved(self):
|
||||
from governance.risk_scorer import RiskResult
|
||||
d = Decision(verdict="deny", reason="no", risk=RiskResult(9, "critical", {}), policy_result={})
|
||||
assert not d.approved
|
||||
|
||||
|
||||
class TestEnforcer:
|
||||
def test_default_allow(self, tmp_path):
|
||||
enforcer = _make_enforcer(tmp_path)
|
||||
decision = enforcer.evaluate({"agent": "claudia", "action": "read", "hour": 12})
|
||||
assert decision.verdict == "allow"
|
||||
|
||||
def test_policy_deny(self, tmp_path):
|
||||
enforcer = _make_enforcer(tmp_path, rules=[
|
||||
{"name": "deny-ext", "conditions": {"target": "external"}, "effect": "deny", "priority": 10},
|
||||
])
|
||||
decision = enforcer.evaluate({"agent": "claudia", "action": "send", "target": "external", "hour": 12})
|
||||
assert decision.verdict == "deny"
|
||||
|
||||
def test_risk_override(self, tmp_path):
|
||||
"""High risk should override an allow policy to escalate."""
|
||||
enforcer = _make_enforcer(tmp_path, rules=[
|
||||
{"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1},
|
||||
])
|
||||
decision = enforcer.evaluate({
|
||||
"agent": "claudia", "action": "export",
|
||||
"data_type": "restricted", "target": "external", "hour": 12,
|
||||
})
|
||||
# Risk should be high/critical, overriding the allow
|
||||
assert decision.verdict in ("deny", "escalate")
|
||||
|
||||
def test_evidence_recorded(self, tmp_path):
|
||||
enforcer = _make_enforcer(tmp_path)
|
||||
enforcer.evaluate({"agent": "test", "action": "read", "hour": 12})
|
||||
assert len(enforcer.evidence_collector.evidence) == 1
|
||||
|
||||
def test_data_classification_alias(self, tmp_path):
|
||||
enforcer = _make_enforcer(tmp_path)
|
||||
decision = enforcer.evaluate({
|
||||
"agent": "test", "action": "read",
|
||||
"data_classification": "confidential", "hour": 12,
|
||||
})
|
||||
# Should use data_classification as data_type
|
||||
assert decision.risk.factors["data_type"]["value"] == "confidential"
|
||||
86
tests/test_governance_evidence.py
Normal file
86
tests/test_governance_evidence.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
"""Tests for governance/evidence.py — Evidence Collector & Control Mapping."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
|
||||
from governance.evidence import EvidenceCollector, EvidenceRecord, ControlMapping
|
||||
|
||||
|
||||
class TestControlMapping:
|
||||
def test_missing_file(self):
|
||||
m = ControlMapping(mapping_path="/nonexistent/path.yaml")
|
||||
assert m.get_controls("anything") == []
|
||||
|
||||
def test_load_mapping(self, tmp_path):
|
||||
mapping_file = tmp_path / "mapping.yaml"
|
||||
mapping_file.write_text(yaml.dump({
|
||||
"mappings": [
|
||||
{"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]},
|
||||
{"event_types": ["access_request", "data_export"], "controls": ["A.9.1"]},
|
||||
]
|
||||
}))
|
||||
m = ControlMapping(mapping_path=str(mapping_file))
|
||||
assert "A.5.1" in m.get_controls("policy_evaluation")
|
||||
assert "A.9.1" in m.get_controls("access_request")
|
||||
assert m.get_controls("unknown_event") == []
|
||||
|
||||
|
||||
class TestEvidenceRecord:
|
||||
def test_to_dict(self):
|
||||
r = EvidenceRecord(
|
||||
timestamp="2026-01-01T00:00:00Z",
|
||||
event_type="test",
|
||||
agent="claudia",
|
||||
action="read",
|
||||
verdict="allow",
|
||||
risk_score=2,
|
||||
risk_level="low",
|
||||
controls=["A.5.1"],
|
||||
)
|
||||
d = r.to_dict()
|
||||
assert d["agent"] == "claudia"
|
||||
assert d["controls"] == ["A.5.1"]
|
||||
|
||||
|
||||
class TestEvidenceCollector:
|
||||
def setup_method(self):
|
||||
self.collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
|
||||
|
||||
def test_record(self):
|
||||
rec = self.collector.record(
|
||||
event_type="policy_evaluation",
|
||||
agent="claudia",
|
||||
action="send_email",
|
||||
verdict="allow",
|
||||
risk_score=3,
|
||||
risk_level="low",
|
||||
)
|
||||
assert rec.agent == "claudia"
|
||||
assert len(self.collector.evidence) == 1
|
||||
|
||||
def test_filter_by_agent(self):
|
||||
self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
|
||||
self.collector.record(event_type="e", agent="b", action="x", verdict="deny")
|
||||
assert len(self.collector.get_evidence(agent="a")) == 1
|
||||
|
||||
def test_filter_by_verdict(self):
|
||||
self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
|
||||
self.collector.record(event_type="e", agent="a", action="y", verdict="deny")
|
||||
assert len(self.collector.get_evidence(verdict="deny")) == 1
|
||||
|
||||
def test_export_json(self):
|
||||
self.collector.record(event_type="e", agent="a", action="x", verdict="allow")
|
||||
exported = self.collector.export_json()
|
||||
data = json.loads(exported)
|
||||
assert len(data) == 1
|
||||
assert data[0]["agent"] == "a"
|
||||
|
||||
def test_empty_evidence(self):
|
||||
assert self.collector.get_evidence() == []
|
||||
assert json.loads(self.collector.export_json()) == []
|
||||
126
tests/test_governance_policy.py
Normal file
126
tests/test_governance_policy.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Tests for governance/policy.py — Policy Engine.
|
||||
|
||||
NOTE: This module exists only in darkplex-core. Tests written against the module API.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
# We need yaml for creating test fixtures
|
||||
import yaml
|
||||
|
||||
|
||||
def _write_policy(tmpdir, filename, data):
|
||||
path = Path(tmpdir) / filename
|
||||
path.write_text(yaml.dump(data))
|
||||
return path
|
||||
|
||||
|
||||
class TestRule:
|
||||
def setup_method(self):
|
||||
import sys
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
from governance.policy import Rule
|
||||
self.Rule = Rule
|
||||
|
||||
def test_matches_simple(self):
|
||||
r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
|
||||
assert r.matches({"agent": "claudia"})
|
||||
|
||||
def test_no_match(self):
|
||||
r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
|
||||
assert not r.matches({"agent": "other"})
|
||||
|
||||
def test_missing_key(self):
|
||||
r = self.Rule(name="r1", conditions={"agent": "claudia"}, effect="allow")
|
||||
assert not r.matches({})
|
||||
|
||||
def test_list_condition(self):
|
||||
r = self.Rule(name="r1", conditions={"action": ["read", "write"]}, effect="allow")
|
||||
assert r.matches({"action": "read"})
|
||||
assert not r.matches({"action": "delete"})
|
||||
|
||||
def test_multiple_conditions(self):
|
||||
r = self.Rule(name="r1", conditions={"agent": "claudia", "action": "send"}, effect="deny")
|
||||
assert r.matches({"agent": "claudia", "action": "send"})
|
||||
assert not r.matches({"agent": "claudia", "action": "read"})
|
||||
|
||||
|
||||
class TestPolicyEngine:
|
||||
def setup_method(self):
|
||||
import sys
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
from governance.policy import PolicyEngine
|
||||
self.PolicyEngine = PolicyEngine
|
||||
|
||||
def test_empty_dir(self, tmp_path):
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
assert engine.policies == []
|
||||
|
||||
def test_nonexistent_dir(self, tmp_path):
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path / "nope"))
|
||||
assert engine.policies == []
|
||||
|
||||
def test_load_policy(self, tmp_path):
|
||||
_write_policy(tmp_path, "test.yaml", {
|
||||
"name": "test-policy",
|
||||
"description": "Test",
|
||||
"version": "1.0.0",
|
||||
"rules": [
|
||||
{"name": "deny-external", "conditions": {"target": "external"}, "effect": "deny", "priority": 10},
|
||||
],
|
||||
})
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
assert len(engine.policies) == 1
|
||||
assert engine.policies[0].name == "test-policy"
|
||||
assert len(engine.policies[0].rules) == 1
|
||||
|
||||
def test_evaluate_no_match(self, tmp_path):
|
||||
_write_policy(tmp_path, "test.yaml", {
|
||||
"name": "p", "description": "", "version": "1",
|
||||
"rules": [{"name": "r1", "conditions": {"agent": "x"}, "effect": "deny"}],
|
||||
})
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
result = engine.evaluate({"agent": "y"})
|
||||
assert result["verdict"] == "allow"
|
||||
|
||||
def test_evaluate_match_deny(self, tmp_path):
|
||||
_write_policy(tmp_path, "test.yaml", {
|
||||
"name": "p", "description": "", "version": "1",
|
||||
"rules": [{"name": "r1", "conditions": {"target": "external"}, "effect": "deny", "priority": 5}],
|
||||
})
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
result = engine.evaluate({"target": "external"})
|
||||
assert result["verdict"] == "deny"
|
||||
|
||||
def test_priority_ordering(self, tmp_path):
|
||||
_write_policy(tmp_path, "test.yaml", {
|
||||
"name": "p", "description": "", "version": "1",
|
||||
"rules": [
|
||||
{"name": "allow-all", "conditions": {"agent": "claudia"}, "effect": "allow", "priority": 1},
|
||||
{"name": "deny-ext", "conditions": {"agent": "claudia"}, "effect": "deny", "priority": 10},
|
||||
],
|
||||
})
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
result = engine.evaluate({"agent": "claudia"})
|
||||
assert result["verdict"] == "deny" # higher priority wins
|
||||
|
||||
def test_reload(self, tmp_path):
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
assert len(engine.policies) == 0
|
||||
_write_policy(tmp_path, "new.yaml", {
|
||||
"name": "new", "description": "", "version": "1", "rules": [],
|
||||
})
|
||||
engine.reload()
|
||||
assert len(engine.policies) == 1
|
||||
|
||||
def test_skips_schema_yaml(self, tmp_path):
|
||||
_write_policy(tmp_path, "schema.yaml", {"name": "schema"})
|
||||
_write_policy(tmp_path, "real.yaml", {
|
||||
"name": "real", "description": "", "version": "1", "rules": [],
|
||||
})
|
||||
engine = self.PolicyEngine(policies_dir=str(tmp_path))
|
||||
assert len(engine.policies) == 1
|
||||
assert engine.policies[0].name == "real"
|
||||
57
tests/test_governance_report.py
Normal file
57
tests/test_governance_report.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
"""Tests for governance/report_generator.py."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
|
||||
from governance.evidence import EvidenceCollector, ControlMapping
|
||||
from governance.report_generator import ReportGenerator
|
||||
|
||||
|
||||
class TestReportGenerator:
|
||||
def _collector_with_mapping(self, tmp_path):
|
||||
import yaml
|
||||
mapping_file = tmp_path / "mapping.yaml"
|
||||
mapping_file.write_text(yaml.dump({
|
||||
"mappings": [
|
||||
{"event_types": ["policy_evaluation"], "controls": ["A.5.1", "A.8.1"]},
|
||||
]
|
||||
}))
|
||||
return EvidenceCollector(control_mapping=ControlMapping(str(mapping_file)))
|
||||
|
||||
def test_empty_report(self):
|
||||
collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
|
||||
gen = ReportGenerator(collector)
|
||||
report = gen.generate_compliance_report()
|
||||
assert report["status"] == "no_evidence"
|
||||
|
||||
def test_report_with_evidence(self, tmp_path):
|
||||
collector = self._collector_with_mapping(tmp_path)
|
||||
collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow", risk_score=2, risk_level="low")
|
||||
collector.record(event_type="policy_evaluation", agent="claudia", action="write", verdict="deny", risk_score=8, risk_level="high")
|
||||
|
||||
gen = ReportGenerator(collector)
|
||||
report = gen.generate_compliance_report()
|
||||
assert report["total_evidence"] == 2
|
||||
assert "A.5.1" in report["controls_covered"]
|
||||
assert report["summary"]["total_deny"] == 1
|
||||
assert report["summary"]["high_risk_events"] == 1
|
||||
|
||||
def test_agent_report(self, tmp_path):
|
||||
collector = self._collector_with_mapping(tmp_path)
|
||||
collector.record(event_type="policy_evaluation", agent="claudia", action="read", verdict="allow")
|
||||
collector.record(event_type="policy_evaluation", agent="other", action="read", verdict="deny")
|
||||
|
||||
gen = ReportGenerator(collector)
|
||||
report = gen.generate_agent_report("claudia")
|
||||
assert report["agent"] == "claudia"
|
||||
assert report["total_actions"] == 1
|
||||
|
||||
def test_export_json(self):
|
||||
collector = EvidenceCollector(control_mapping=ControlMapping("/dev/null"))
|
||||
gen = ReportGenerator(collector)
|
||||
output = gen.export_json()
|
||||
data = json.loads(output)
|
||||
assert "status" in data # empty report
|
||||
80
tests/test_governance_risk_scorer.py
Normal file
80
tests/test_governance_risk_scorer.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Tests for governance/risk_scorer.py."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
|
||||
from governance.risk_scorer import RiskScorer, RiskResult, _classify_level
|
||||
|
||||
|
||||
class TestClassifyLevel:
|
||||
def test_low(self):
|
||||
assert _classify_level(0) == "low"
|
||||
assert _classify_level(3) == "low"
|
||||
|
||||
def test_elevated(self):
|
||||
assert _classify_level(4) == "elevated"
|
||||
assert _classify_level(6) == "elevated"
|
||||
|
||||
def test_high(self):
|
||||
assert _classify_level(7) == "high"
|
||||
assert _classify_level(8) == "high"
|
||||
|
||||
def test_critical(self):
|
||||
assert _classify_level(9) == "critical"
|
||||
assert _classify_level(10) == "critical"
|
||||
|
||||
|
||||
class TestRiskResult:
|
||||
def test_acceptable(self):
|
||||
r = RiskResult(value=3, level="low", factors={})
|
||||
assert r.is_acceptable
|
||||
|
||||
def test_not_acceptable(self):
|
||||
r = RiskResult(value=7, level="high", factors={})
|
||||
assert not r.is_acceptable
|
||||
|
||||
|
||||
class TestRiskScorer:
|
||||
def setup_method(self):
|
||||
self.scorer = RiskScorer()
|
||||
|
||||
def test_default_low_risk(self):
|
||||
result = self.scorer.score({"hour": 12})
|
||||
assert result.level == "low"
|
||||
assert result.is_acceptable
|
||||
|
||||
def test_public_internal(self):
|
||||
result = self.scorer.score({"data_type": "public", "target": "internal", "hour": 12})
|
||||
assert result.value <= 3
|
||||
|
||||
def test_confidential_external(self):
|
||||
result = self.scorer.score({"data_type": "confidential", "target": "external", "hour": 12})
|
||||
assert result.value >= 7
|
||||
|
||||
def test_restricted_critical(self):
|
||||
result = self.scorer.score({"data_type": "restricted", "target": "external", "hour": 12})
|
||||
assert result.level in ("high", "critical")
|
||||
|
||||
def test_off_hours_bonus(self):
|
||||
day = self.scorer.score({"data_type": "internal", "hour": 12})
|
||||
night = self.scorer.score({"data_type": "internal", "hour": 2})
|
||||
assert night.value > day.value
|
||||
|
||||
def test_admin_role_reduces_risk(self):
|
||||
admin = self.scorer.score({"agent_role": "admin", "hour": 12})
|
||||
external = self.scorer.score({"agent_role": "external", "hour": 12})
|
||||
assert admin.value < external.value
|
||||
|
||||
def test_factors_populated(self):
|
||||
result = self.scorer.score({"data_type": "internal", "target": "external", "hour": 10})
|
||||
assert "data_type" in result.factors
|
||||
assert "target" in result.factors
|
||||
assert "agent_role" in result.factors
|
||||
assert "time_of_day" in result.factors
|
||||
|
||||
def test_clamped_0_10(self):
|
||||
# Even with extreme values, should be 0-10
|
||||
result = self.scorer.score({"data_type": "restricted", "target": "external", "agent_role": "external", "hour": 3})
|
||||
assert 0 <= result.value <= 10
|
||||
136
tests/test_knowledge_cleanup.py
Normal file
136
tests/test_knowledge_cleanup.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
"""Tests for intelligence/knowledge_cleanup.py — Knowledge Graph Cleanup."""
|
||||
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
|
||||
|
||||
import knowledge_cleanup as kc
|
||||
|
||||
|
||||
class TestBackup:
|
||||
def test_creates_backup(self, tmp_path):
|
||||
src = tmp_path / "test.json"
|
||||
src.write_text('{"a": 1}')
|
||||
backup_path = kc.backup(src)
|
||||
assert backup_path.exists()
|
||||
assert "backup_" in backup_path.name
|
||||
|
||||
class TestAtomicWrite:
|
||||
def test_writes_atomically(self, tmp_path):
|
||||
path = tmp_path / "out.json"
|
||||
kc.atomic_write(path, {"key": "value"})
|
||||
assert json.loads(path.read_text()) == {"key": "value"}
|
||||
|
||||
|
||||
class TestFindDuplicates:
|
||||
def test_no_duplicates(self):
|
||||
entities = {"albert": {}, "mondo-gate": {}}
|
||||
groups = kc.find_duplicates(entities)
|
||||
assert len(groups) == 0
|
||||
|
||||
def test_case_duplicates(self):
|
||||
entities = {"Albert": {}, "albert": {}, "ALBERT": {}}
|
||||
groups = kc.find_duplicates(entities)
|
||||
assert len(groups) >= 1
|
||||
|
||||
def test_substring_duplicates(self):
|
||||
entities = {"mondo": {"type": "company"}, "mondo gate": {"type": "company"}}
|
||||
groups = kc.find_duplicates(entities)
|
||||
assert len(groups) >= 1
|
||||
|
||||
|
||||
class TestPickCanonical:
|
||||
def test_prefers_uppercase(self):
|
||||
names = ["albert", "Albert"]
|
||||
entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
|
||||
assert kc.pick_canonical(names, entities) == "Albert"
|
||||
|
||||
def test_prefers_more_fields(self):
|
||||
names = ["a", "A"]
|
||||
entities = {"a": {"type": "person"}, "A": {"type": "person", "source": "x", "extra": "y"}}
|
||||
assert kc.pick_canonical(names, entities) == "A"
|
||||
|
||||
|
||||
class TestDeduplicate:
|
||||
def test_merges_entities(self):
|
||||
entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
|
||||
rels = {}
|
||||
e, r = kc.deduplicate(entities, rels, dry_run=False)
|
||||
assert len(e) == 1
|
||||
|
||||
def test_dry_run_no_change(self):
|
||||
entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
|
||||
rels = {}
|
||||
e, r = kc.deduplicate(entities, rels, dry_run=True)
|
||||
assert len(e) == 2 # unchanged in dry run
|
||||
|
||||
def test_updates_relationships(self):
|
||||
entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
|
||||
rels = {
|
||||
"albert::mondo": {"a": "albert", "b": "mondo", "types": ["co-occurrence"], "count": 1, "first_seen": "2026-01-01", "last_seen": "2026-01-01"},
|
||||
}
|
||||
e, r = kc.deduplicate(entities, rels, dry_run=False)
|
||||
# Relationship should be remapped to canonical
|
||||
assert len(r) == 1
|
||||
|
||||
|
||||
class TestScoreRelationships:
|
||||
def test_scores_assigned(self):
|
||||
rels = {
|
||||
"a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat(), "first_seen": "2026-01-01"},
|
||||
}
|
||||
result = kc.score_relationships(rels, dry_run=False)
|
||||
assert "strength" in result["a::b"]
|
||||
assert 0 < result["a::b"]["strength"] <= 1
|
||||
|
||||
def test_removes_weak(self):
|
||||
old_date = (datetime.now() - timedelta(days=300)).isoformat()
|
||||
rels = {
|
||||
"a::b": {"count": 1, "types": ["co-occurrence"], "last_seen": old_date, "first_seen": old_date},
|
||||
}
|
||||
result = kc.score_relationships(rels, dry_run=False)
|
||||
# Very old + low count should have low strength
|
||||
if len(result) > 0:
|
||||
assert result["a::b"]["strength"] < 0.3
|
||||
|
||||
def test_dry_run(self):
|
||||
rels = {
|
||||
"a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat()},
|
||||
}
|
||||
result = kc.score_relationships(rels, dry_run=True)
|
||||
assert "strength" not in result["a::b"]
|
||||
|
||||
|
||||
class TestClassifyUnknowns:
|
||||
@patch("knowledge_cleanup.ollama_generate")
|
||||
def test_no_unknowns(self, mock_ollama):
|
||||
entities = {"albert": {"type": "person"}}
|
||||
result = kc.classify_unknowns(entities, dry_run=False)
|
||||
mock_ollama.assert_not_called()
|
||||
assert result == entities
|
||||
|
||||
@patch("knowledge_cleanup.ollama_generate")
|
||||
def test_classifies_unknowns(self, mock_ollama):
|
||||
mock_ollama.return_value = '{"1": "person"}'
|
||||
entities = {"albert": {"type": "unknown"}}
|
||||
result = kc.classify_unknowns(entities, dry_run=False)
|
||||
assert result["albert"]["type"] == "person"
|
||||
|
||||
@patch("knowledge_cleanup.ollama_generate")
|
||||
def test_dry_run_no_change(self, mock_ollama):
|
||||
mock_ollama.return_value = '{"1": "person"}'
|
||||
entities = {"albert": {"type": "unknown"}}
|
||||
result = kc.classify_unknowns(entities, dry_run=True)
|
||||
assert result["albert"]["type"] == "unknown"
|
||||
|
||||
@patch("knowledge_cleanup.ollama_generate")
|
||||
def test_handles_llm_failure(self, mock_ollama):
|
||||
mock_ollama.side_effect = Exception("timeout")
|
||||
entities = {"albert": {"type": "unknown"}}
|
||||
result = kc.classify_unknowns(entities, dry_run=False)
|
||||
assert result["albert"]["type"] == "unknown" # unchanged
|
||||
61
tests/test_knowledge_extractor.py
Normal file
61
tests/test_knowledge_extractor.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
"""Tests for knowledge_extractor.py (darkplex-core root) — Smart Extractor."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core"))
|
||||
|
||||
from knowledge_extractor import importance_heuristic, parse_since
|
||||
|
||||
|
||||
class TestImportanceHeuristic:
|
||||
def test_empty(self):
|
||||
assert importance_heuristic("") == 0.0
|
||||
assert importance_heuristic(None) == 0.0
|
||||
|
||||
def test_short_text(self):
|
||||
score = importance_heuristic("Hello world")
|
||||
assert 0 < score <= 1.0
|
||||
|
||||
def test_long_text_boosted(self):
|
||||
short = importance_heuristic("Hello")
|
||||
long = importance_heuristic("x " * 300)
|
||||
assert long > short
|
||||
|
||||
def test_heartbeat_penalized(self):
|
||||
score = importance_heuristic("HEARTBEAT_OK system running fine no issues detected at all")
|
||||
assert score < 0.3
|
||||
|
||||
def test_business_boosted(self):
|
||||
score = importance_heuristic("Meeting about the project deadline and contract with the client partnership")
|
||||
assert score > 0.4
|
||||
|
||||
def test_capitalized_names_boost(self):
|
||||
text = "Albert discussed with Thomas, Sarah, Michael, Peter, Franz, and Maria about the Company"
|
||||
score = importance_heuristic(text)
|
||||
assert score > 0.4
|
||||
|
||||
def test_clamped(self):
|
||||
# Even extreme texts should be 0-1
|
||||
score = importance_heuristic("cron: heartbeat HEARTBEAT_OK health check no critical")
|
||||
assert 0 <= score <= 1.0
|
||||
|
||||
|
||||
class TestParseSince:
|
||||
def test_hours(self):
|
||||
ts = parse_since("6h")
|
||||
assert ts is not None
|
||||
assert ts > 0
|
||||
|
||||
def test_days(self):
|
||||
ts = parse_since("1d")
|
||||
assert ts is not None
|
||||
|
||||
def test_minutes(self):
|
||||
ts = parse_since("30m")
|
||||
assert ts is not None
|
||||
|
||||
def test_invalid(self):
|
||||
assert parse_since("abc") is None
|
||||
assert parse_since("") is None
|
||||
147
tests/test_llm_extractor.py
Normal file
147
tests/test_llm_extractor.py
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
"""Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
|
||||
|
||||
from llm_extractor import (
|
||||
_parse_json_response,
|
||||
_normalize_entities,
|
||||
extract_entities_llm,
|
||||
extract_entities_llm_batch,
|
||||
is_available,
|
||||
VALID_TYPES,
|
||||
)
|
||||
|
||||
|
||||
class TestParseJsonResponse:
|
||||
def test_empty(self):
|
||||
assert _parse_json_response("") == {}
|
||||
assert _parse_json_response(None) == {}
|
||||
|
||||
def test_plain_json(self):
|
||||
r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}')
|
||||
assert "albert" in r
|
||||
|
||||
def test_markdown_fenced(self):
|
||||
r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```')
|
||||
assert "albert" in r
|
||||
|
||||
def test_no_json(self):
|
||||
assert _parse_json_response("no json here") == {}
|
||||
|
||||
def test_nested_braces(self):
|
||||
r = _parse_json_response('{"a": {"type": "person", "context": "test"}}')
|
||||
assert "a" in r
|
||||
|
||||
|
||||
class TestNormalizeEntities:
|
||||
def test_valid_entity(self):
|
||||
raw = {"Albert": {"type": "person", "context": "CEO of company"}}
|
||||
result = _normalize_entities(raw)
|
||||
assert "albert" in result
|
||||
assert result["albert"]["type"] == "person"
|
||||
assert result["albert"]["match"] == "llm"
|
||||
|
||||
def test_type_alias(self):
|
||||
raw = {"python": {"type": "language", "context": "programming"}}
|
||||
result = _normalize_entities(raw)
|
||||
assert result["python"]["type"] == "technology"
|
||||
|
||||
def test_unknown_type_becomes_concept(self):
|
||||
raw = {"thing": {"type": "xyzzy", "context": "unknown"}}
|
||||
result = _normalize_entities(raw)
|
||||
assert result["thing"]["type"] == "concept"
|
||||
|
||||
def test_filters_short_names(self):
|
||||
raw = {"x": {"type": "person", "context": "test"}}
|
||||
result = _normalize_entities(raw)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_filters_long_names(self):
|
||||
raw = {"a" * 81: {"type": "person", "context": "test"}}
|
||||
result = _normalize_entities(raw)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_non_dict_info_skipped(self):
|
||||
raw = {"test": "not a dict"}
|
||||
result = _normalize_entities(raw)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_context_truncated(self):
|
||||
raw = {"test": {"type": "person", "context": "x" * 200}}
|
||||
result = _normalize_entities(raw)
|
||||
assert len(result["test"]["context"]) <= 100
|
||||
|
||||
def test_underscores_to_hyphens(self):
|
||||
raw = {"mondo_gate": {"type": "company", "context": "test"}}
|
||||
result = _normalize_entities(raw)
|
||||
assert "mondo-gate" in result
|
||||
|
||||
|
||||
class TestExtractEntitiesLlm:
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_empty_text(self, mock_ollama):
|
||||
assert extract_entities_llm("") == {}
|
||||
assert extract_entities_llm("short") == {}
|
||||
mock_ollama.assert_not_called()
|
||||
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_ollama_unavailable(self, mock_ollama):
|
||||
mock_ollama.return_value = None
|
||||
result = extract_entities_llm("This is a test about Albert and Mondo Gate AG")
|
||||
assert result is None # signals fallback
|
||||
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_successful_extraction(self, mock_ollama):
|
||||
mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}'
|
||||
result = extract_entities_llm("Albert discussed the project with the team members today")
|
||||
assert "albert" in result
|
||||
assert result["albert"]["type"] == "person"
|
||||
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_truncates_long_text(self, mock_ollama):
|
||||
mock_ollama.return_value = "{}"
|
||||
extract_entities_llm("x" * 3000)
|
||||
call_args = mock_ollama.call_args[0][0]
|
||||
# The text in the prompt should be truncated
|
||||
assert len(call_args) < 3000 + 500 # prompt overhead
|
||||
|
||||
|
||||
class TestExtractEntitiesLlmBatch:
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_empty_list(self, mock_ollama):
|
||||
assert extract_entities_llm_batch([]) == {}
|
||||
mock_ollama.assert_not_called()
|
||||
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_filters_short_texts(self, mock_ollama):
|
||||
mock_ollama.return_value = "{}"
|
||||
result = extract_entities_llm_batch(["hi", "yo", ""])
|
||||
assert result == {}
|
||||
mock_ollama.assert_not_called()
|
||||
|
||||
@patch("llm_extractor._call_ollama")
|
||||
def test_batch_extraction(self, mock_ollama):
|
||||
mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}'
|
||||
result = extract_entities_llm_batch(["Python is a great programming language for data science"])
|
||||
assert "python" in result
|
||||
|
||||
|
||||
class TestIsAvailable:
|
||||
@patch("llm_extractor.urllib.request.urlopen")
|
||||
def test_available(self, mock_urlopen):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status = 200
|
||||
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
mock_urlopen.return_value = mock_resp
|
||||
assert is_available() is True
|
||||
|
||||
@patch("llm_extractor.urllib.request.urlopen")
|
||||
def test_unavailable(self, mock_urlopen):
|
||||
mock_urlopen.side_effect = Exception("connection refused")
|
||||
assert is_available() is False
|
||||
119
tests/test_loop.py
Normal file
119
tests/test_loop.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"""Tests for intelligence/loop.py — Darkplex Loop state machine and helpers."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
|
||||
|
||||
import loop as darkplex_loop
|
||||
|
||||
|
||||
class TestImportance:
|
||||
def test_empty(self):
|
||||
assert darkplex_loop._importance("") == 0.0
|
||||
|
||||
def test_heartbeat_low(self):
|
||||
assert darkplex_loop._importance("HEARTBEAT_OK all systems nominal") < 0.2
|
||||
|
||||
def test_business_content_high(self):
|
||||
score = darkplex_loop._importance("Meeting about the project deadline and budget milestone")
|
||||
assert score > 0.4
|
||||
|
||||
def test_clamped(self):
|
||||
for text in ["", "x" * 1000, "meeting project company contract decision strategy"]:
|
||||
s = darkplex_loop._importance(text)
|
||||
assert 0.0 <= s <= 1.0
|
||||
|
||||
|
||||
class TestLoopState:
|
||||
def test_init(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
assert state.status == "INIT"
|
||||
assert state.cycle_count == 0
|
||||
|
||||
def test_save_and_load(self, tmp_path):
|
||||
sf = tmp_path / "state.json"
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', sf):
|
||||
state = darkplex_loop.LoopState()
|
||||
state.status = "RUNNING"
|
||||
state.cycle_count = 5
|
||||
state.save()
|
||||
|
||||
state2 = darkplex_loop.LoopState()
|
||||
assert state2.status == "RUNNING"
|
||||
assert state2.cycle_count == 5
|
||||
|
||||
def test_record_success(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
state.record_success({"test": "ok"})
|
||||
assert state.status == "RUNNING"
|
||||
assert state.consecutive_failures == 0
|
||||
assert state.cycle_count == 1
|
||||
|
||||
def test_record_failure_degraded(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
state.record_failure("ingest", "timeout")
|
||||
assert state.status == "DEGRADED"
|
||||
assert state.consecutive_failures == 1
|
||||
|
||||
def test_record_failure_emergency(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
for i in range(3):
|
||||
state.record_failure("ingest", "timeout")
|
||||
assert state.status == "EMERGENCY"
|
||||
|
||||
def test_can_alert(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
assert state.can_alert()
|
||||
state.mark_alerted()
|
||||
assert not state.can_alert()
|
||||
|
||||
def test_record_perf(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
state.record_perf({"total_ms": 1000, "ingest_ms": 200})
|
||||
assert state.perf["total_ms"] == 1000
|
||||
assert len(state.perf_history) == 1
|
||||
|
||||
def test_perf_averages(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
state.record_perf({"total_ms": 1000})
|
||||
state.record_perf({"total_ms": 2000})
|
||||
avgs = state.perf_averages()
|
||||
assert avgs["total_ms"] == 1500
|
||||
|
||||
def test_perf_history_capped(self, tmp_path):
|
||||
with patch.object(darkplex_loop, 'STATE_FILE', tmp_path / "state.json"):
|
||||
state = darkplex_loop.LoopState()
|
||||
for i in range(15):
|
||||
state.record_perf({"total_ms": i * 100})
|
||||
assert len(state.perf_history) == 10
|
||||
|
||||
|
||||
class TestCheckNewEvents:
|
||||
@patch("loop.subprocess.run")
|
||||
def test_returns_pending(self, mock_run):
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0, stdout=json.dumps({"num_pending": 42})
|
||||
)
|
||||
assert darkplex_loop.check_new_events() == 42
|
||||
|
||||
@patch("loop.subprocess.run")
|
||||
def test_returns_negative_on_failure(self, mock_run):
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="")
|
||||
assert darkplex_loop.check_new_events() == -1
|
||||
|
||||
@patch("loop.subprocess.run")
|
||||
def test_handles_exception(self, mock_run):
|
||||
mock_run.side_effect = Exception("nats not found")
|
||||
assert darkplex_loop.check_new_events() == -1
|
||||
72
tests/test_shared_memory.py
Normal file
72
tests/test_shared_memory.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""Tests for intelligence/shared_memory module."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from cortex.intelligence.shared_memory import ALLOWED_AGENTS, Insight, SharedMemory
|
||||
|
||||
|
||||
class TestInsight:
|
||||
def test_creation(self):
|
||||
i = Insight(agent="claudia", topic="test", content="hello")
|
||||
assert i.agent == "claudia"
|
||||
assert i.timestamp # auto-set
|
||||
|
||||
def test_to_json(self):
|
||||
i = Insight(agent="claudia", topic="test", content="hello")
|
||||
data = json.loads(i.to_json())
|
||||
assert data["agent"] == "claudia"
|
||||
assert data["topic"] == "test"
|
||||
|
||||
def test_from_json(self):
|
||||
i = Insight(agent="claudia", topic="test", content="hello", confidence=0.9)
|
||||
i2 = Insight.from_json(i.to_json())
|
||||
assert i2.agent == i.agent
|
||||
assert i2.confidence == 0.9
|
||||
|
||||
def test_default_confidence(self):
|
||||
i = Insight(agent="claudia", topic="t", content="c")
|
||||
assert i.confidence == 0.8
|
||||
|
||||
def test_tags(self):
|
||||
i = Insight(agent="claudia", topic="t", content="c", tags=["a", "b"])
|
||||
assert len(i.tags) == 2
|
||||
|
||||
|
||||
class TestSharedMemory:
|
||||
def test_allowed_agent(self):
|
||||
agent = list(ALLOWED_AGENTS)[0]
|
||||
sm = SharedMemory(agent_name=agent)
|
||||
assert sm.agent_name == agent
|
||||
|
||||
def test_disallowed_agent(self):
|
||||
with pytest.raises(ValueError, match="not allowed"):
|
||||
SharedMemory(agent_name="hacker_bot")
|
||||
|
||||
def test_not_connected_publish(self):
|
||||
agent = list(ALLOWED_AGENTS)[0]
|
||||
sm = SharedMemory(agent_name=agent)
|
||||
with pytest.raises(RuntimeError, match="Not connected"):
|
||||
import asyncio
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
sm.publish(Insight(agent=agent, topic="t", content="c"))
|
||||
)
|
||||
|
||||
def test_not_connected_subscribe(self):
|
||||
agent = list(ALLOWED_AGENTS)[0]
|
||||
sm = SharedMemory(agent_name=agent)
|
||||
with pytest.raises(RuntimeError, match="Not connected"):
|
||||
import asyncio
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
sm.subscribe("t", lambda x: None)
|
||||
)
|
||||
|
||||
|
||||
class TestAllowedAgents:
|
||||
def test_default_agents(self):
|
||||
assert "claudia" in ALLOWED_AGENTS
|
||||
assert len(ALLOWED_AGENTS) >= 1
|
||||
77
tests/test_temporal.py
Normal file
77
tests/test_temporal.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"""Tests for intelligence/temporal.py — Temporal Context API."""
|
||||
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
|
||||
|
||||
from temporal import TemporalEntry, TemporalQuery, TemporalContext
|
||||
|
||||
|
||||
class TestTemporalEntry:
|
||||
def test_creation(self):
|
||||
e = TemporalEntry(
|
||||
timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
||||
source="nats",
|
||||
topic="ssl-cert",
|
||||
content="SSL cert expiring",
|
||||
)
|
||||
assert e.source == "nats"
|
||||
assert e.relevance_score == 0.0
|
||||
|
||||
def test_metadata(self):
|
||||
e = TemporalEntry(
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
source="chromadb",
|
||||
topic="test",
|
||||
content="test",
|
||||
metadata={"key": "value"},
|
||||
relevance_score=0.95,
|
||||
)
|
||||
assert e.metadata["key"] == "value"
|
||||
assert e.relevance_score == 0.95
|
||||
|
||||
|
||||
class TestTemporalQuery:
|
||||
def test_defaults(self):
|
||||
q = TemporalQuery(topic="test")
|
||||
assert q.limit == 50
|
||||
assert "nats" in q.sources
|
||||
assert "chromadb" in q.sources
|
||||
|
||||
def test_custom(self):
|
||||
q = TemporalQuery(
|
||||
topic="ssl",
|
||||
start_time=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
||||
limit=10,
|
||||
sources=["nats"],
|
||||
)
|
||||
assert q.limit == 10
|
||||
assert len(q.sources) == 1
|
||||
|
||||
|
||||
class TestTemporalContext:
|
||||
def test_init_defaults(self):
|
||||
ctx = TemporalContext()
|
||||
assert "localhost" in ctx.nats_url
|
||||
assert "localhost" in ctx.chromadb_url
|
||||
|
||||
def test_init_custom(self):
|
||||
ctx = TemporalContext(nats_url="nats://custom:4222", chromadb_url="http://custom:8000")
|
||||
assert ctx.nats_url == "nats://custom:4222"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_no_connections(self):
|
||||
ctx = TemporalContext()
|
||||
# No connections established, should return empty
|
||||
result = await ctx.query(TemporalQuery(topic="test"))
|
||||
assert result == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close_no_connection(self):
|
||||
ctx = TemporalContext()
|
||||
await ctx.close() # Should not raise
|
||||
Loading…
Reference in a new issue