darkplex-core/cortex/loop.py
Claudia fd7d75c0ed
Some checks failed
Tests / test (push) Failing after 2s
Merge darkplex-core into cortex — unified intelligence layer v0.2.0
- Merged all unique darkplex-core modules into cortex:
  - intelligence/ subfolder (anticipator, collective, shared_memory, knowledge_cleanup, temporal, llm_extractor, loop)
  - governance/ subfolder (policy engine, risk scorer, evidence, enforcer, report generator)
  - entity_manager.py, knowledge_extractor.py
- Fixed bare 'from intelligence.' imports to 'from cortex.intelligence.'
- Added 'darkplex' CLI alias alongside 'cortex'
- Package renamed to darkplex-core v0.2.0
- 405 tests passing (was 234)
- 14 new test files covering all merged modules
2026-02-12 08:43:02 +01:00

701 lines
25 KiB
Python

#!/usr/bin/env python3
"""
Darkplex Loop — The single heartbeat of the intelligence pipeline.
One process. One loop. One state machine.
Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
Each cycle:
1. INGEST — Fetch new events from NATS (batch consumer pull)
2. EXTRACT — Pull entities and relationships from events
3. BRIDGE — Sync cortex outputs to knowledge engine
4. VERIFY — Check that real output was produced
5. REPORT — Update state, alert on failure
States:
RUNNING — Everything nominal
DEGRADED — A step failed, but loop continues with recovery attempts
EMERGENCY — Critical failure, alerting
Usage:
darkplex loop # Run loop (default: 1h cycle)
darkplex loop --once # Single cycle, then exit
darkplex loop --cycle 3600 # Custom cycle interval (seconds)
darkplex loop --status # Print current state and exit
darkplex loop --check # Check for new events, exit 0=new 1=none
"""
import json
import logging
import os
import re
import signal
import subprocess
import sys
import time
import traceback
from collections import deque
from datetime import datetime, timezone
from pathlib import Path
# ── Paths (configurable via env) ─────────────────────────────────────────────
BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
SCRIPT_DIR = BASE_DIR / "scripts"
LEVEL4_DIR = SCRIPT_DIR / "level4"
LOG_DIR = BASE_DIR / "logs"
STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
DEFAULT_CYCLE_SECONDS = 3600 # 1 hour
ALERT_COOLDOWN = 3600 # 1 alert per hour max
log = logging.getLogger("darkplex-loop")
# ── State Machine ────────────────────────────────────────────────────────────
class LoopState:
"""Persistent state for the Darkplex Loop."""
def __init__(self):
self.status = "INIT"
self.cycle_count = 0
self.last_cycle = None
self.last_success = None
self.last_failure = None
self.last_alert = None
self.consecutive_failures = 0
self.entities_total = 0
self.relationships_total = 0
self.entities_extracted_last = 0
self.entities_new_last = 0
self.events_processed_last = 0
self.steps = {}
self.error = None
self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}]
self._load()
def _load(self):
try:
data = json.loads(STATE_FILE.read_text())
for k, v in data.items():
if hasattr(self, k):
setattr(self, k, v)
except (FileNotFoundError, json.JSONDecodeError):
pass
def save(self):
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
def record_perf(self, perf: dict):
"""Record performance metrics for this cycle."""
self.perf = perf
self.perf_history.append(perf)
self.perf_history = self.perf_history[-10:] # keep last 10
def perf_averages(self) -> dict:
"""Running averages over last 10 cycles."""
if not self.perf_history:
return {}
keys = self.perf_history[0].keys()
return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
def record_success(self, step_results: dict):
self.status = "RUNNING"
self.consecutive_failures = 0
self.last_success = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_success
self.cycle_count += 1
self.steps = step_results
self.error = None
self.save()
def record_failure(self, step: str, error: str):
self.consecutive_failures += 1
self.last_failure = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_failure
self.cycle_count += 1
self.error = f"{step}: {error}"
if self.consecutive_failures >= 3:
self.status = "EMERGENCY"
else:
self.status = "DEGRADED"
self.save()
def can_alert(self) -> bool:
if not self.last_alert:
return True
try:
last = datetime.fromisoformat(self.last_alert)
return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
except (ValueError, TypeError):
return True
def mark_alerted(self):
self.last_alert = datetime.now(timezone.utc).isoformat()
self.save()
# ── Pipeline Steps ───────────────────────────────────────────────────────────
def _nats_cmd():
"""Build NATS CLI base command with auth."""
nats_bin = os.environ.get("NATS_BIN", "nats")
nats_url = os.environ.get("NATS_URL", "")
if nats_url:
return [nats_bin, "-s", nats_url]
return [nats_bin]
def check_new_events() -> int:
"""Return number of pending events in the consumer. 0 = nothing new."""
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return -1
info = json.loads(r.stdout)
return info.get("num_pending", 0)
except Exception as e:
log.warning(f"check_new_events failed: {e}")
return -1
def step_ingest(state: LoopState) -> dict:
"""Step 1: Fetch new events from NATS using batch consumer pull."""
log.info("STEP 1: INGEST — Fetching events from NATS")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
# Check how many pending
pending = check_new_events()
if pending == 0:
log.info("INGEST: No new events — skipping cycle")
return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
log.info(f"INGEST: {pending} pending events in consumer")
events = []
total_fetched = 0
parse_errors = 0
# Fetch in batches
remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
try:
batch_size = min(remaining, NATS_BATCH_SIZE)
result = subprocess.run(
_nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
"--count", str(batch_size), "--raw"],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
return _step_ingest_sequential(state)
for line in result.stdout.strip().split("\n"):
if not line.strip():
continue
try:
data = json.loads(line)
events.append(data)
total_fetched += 1
except json.JSONDecodeError:
parse_errors += 1
except subprocess.TimeoutExpired:
log.warning("Batch fetch timed out, falling back to sequential")
return _step_ingest_sequential(state)
# Update sequence tracking (get current stream seq from consumer info)
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode == 0:
info = json.loads(r.stdout)
stream_seq = info["delivered"]["stream_seq"]
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
except Exception:
log.warning("Could not save last processed sequence")
log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
def _step_ingest_sequential(state: LoopState) -> dict:
"""Fallback: sequential fetch via stream get (slow but reliable)."""
import base64
log.info("INGEST FALLBACK: Sequential fetch")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
last_processed_seq = 0
try:
if last_processed_seq_file.exists():
last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
except Exception:
pass
r = subprocess.run(
_nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return {"events": [], "total_scanned": 0, "skipped": 0}
info = json.loads(r.stdout)
end_seq = info["state"]["last_seq"]
start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
events = []
skipped = 0
for seq in range(start_seq, end_seq + 1):
try:
result = subprocess.run(
_nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
capture_output=True, text=True, timeout=5,
)
if result.returncode != 0:
skipped += 1
continue
msg = json.loads(result.stdout)
if "conversation_message_in" not in msg.get("subject", ""):
skipped += 1
continue
data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
events.append(data)
except Exception:
skipped += 1
try:
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
except Exception:
pass
log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
def step_extract(state: LoopState, events: list) -> dict:
"""Step 2: Extract entities and relationships from events."""
log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
if not events:
log.info("EXTRACT: No events to process")
return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
sys.path.insert(0, str(LEVEL4_DIR))
import importlib.util
spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
em = importlib.util.module_from_spec(spec)
spec.loader.exec_module(em)
# Try LLM batch extraction first
from llm_extractor import extract_entities_llm_batch, is_available as llm_available
use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
llm_ok = use_llm and llm_available()
if llm_ok:
log.info("EXTRACT: Using LLM extractor (Ollama)")
else:
log.info("EXTRACT: Using regex extractor (fallback)")
known = em.load_known_entities()
entities = em.load_json(ENTITIES_FILE)
relationships = em.load_json(RELATIONSHIPS_FILE)
total_extracted = 0
new_entities = 0
new_relationships = 0
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
# Prepare texts for potential batch LLM processing
event_texts = []
for event in events:
payload = event.get("payload", {})
text = payload.get("text_preview", "") or payload.get("text", "")
if isinstance(text, list):
parts = []
for t in text:
parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
text = " ".join(parts)
if not isinstance(text, str):
text = str(text)
score = _importance(text) if text else 0.0
event_texts.append((text, score))
# LLM batch extraction for qualifying texts
llm_results = {}
if llm_ok:
batch_texts = [t for t, s in event_texts if t and s >= 0.4]
if batch_texts:
consecutive_fails = 0
for i in range(0, len(batch_texts), 10):
if consecutive_fails >= 3:
log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
llm_ok = False
break
chunk = batch_texts[i:i+10]
batch_result = extract_entities_llm_batch(chunk)
if batch_result:
llm_results.update(batch_result)
consecutive_fails = 0
else:
consecutive_fails += 1
if llm_results:
log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
for idx, event in enumerate(events):
text, score = event_texts[idx]
if not text or score < 0.4:
continue
if llm_ok and llm_results:
# Use LLM results + known entity matching
found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
# Add LLM entities that appear in this text
text_lower = text.lower()
for name, info in llm_results.items():
variants = [name, name.replace("-", " "), name.replace("-", "")]
if any(v in text_lower for v in variants if len(v) > 2):
found[name] = info
else:
found = em.extract_entities(text, known)
if not found:
continue
total_extracted += len(found)
names = list(found.keys())
for name, info in found.items():
if name not in entities:
entities[name] = {
"type": info["type"],
"source": "darkplex-loop",
"first_seen": ts_now,
}
new_entities += 1
known[name] = entities[name]
if len(names) >= 2:
for i in range(len(names)):
for j in range(i + 1, min(len(names), i + 5)):
a, b = min(names[i], names[j]), max(names[i], names[j])
key = f"{a}::{b}"
if key in relationships:
relationships[key]["count"] = relationships[key].get("count", 1) + 1
relationships[key]["last_seen"] = ts_now
else:
relationships[key] = {
"a": a, "b": b, "types": ["co-occurrence"],
"count": 1, "first_seen": ts_now, "last_seen": ts_now,
}
new_relationships += 1
em.save_json(ENTITIES_FILE, entities)
em.save_json(RELATIONSHIPS_FILE, relationships)
state.entities_total = len(entities)
state.relationships_total = len(relationships)
state.entities_extracted_last = total_extracted
state.entities_new_last = new_entities
state.events_processed_last = len(events)
log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
def step_bridge(state: LoopState) -> dict:
"""Step 3: Run knowledge bridge."""
log.info("STEP 3: BRIDGE — Syncing cortex outputs")
bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
if not bridge_script.exists():
log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
return {"status": "skipped", "reason": "script not found"}
result = subprocess.run(
[sys.executable, str(bridge_script), "sync"],
capture_output=True, text=True, timeout=120,
)
if result.returncode != 0:
log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
return {"status": "failed", "error": result.stderr[:200]}
bridged = 0
for line in result.stdout.split("\n"):
m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
if m:
bridged += int(m.group(1))
log.info(f"BRIDGE: {bridged} items bridged")
return {"status": "ok", "bridged": bridged}
def step_verify(state: LoopState, extract_result: dict) -> dict:
"""Step 4: Verify output quality."""
log.info("STEP 4: VERIFY — Checking output quality")
issues = []
for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
if not f.exists():
issues.append(f"{label} file missing")
else:
try:
data = json.loads(f.read_text())
if not data:
issues.append(f"{label} file is empty")
except json.JSONDecodeError:
issues.append(f"{label} file is corrupt JSON")
events_processed = state.events_processed_last
extracted = extract_result.get("extracted", 0)
if events_processed > 10 and extracted == 0:
issues.append(f"0 entities from {events_processed} events — extraction may be broken")
try:
r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
if r.returncode != 0:
issues.append("NATS unreachable")
except Exception as e:
issues.append(f"NATS check failed: {e}")
verdict = "PASS" if not issues else "FAIL"
log.info(f"VERIFY: {verdict}{len(issues)} issues")
for issue in issues:
log.warning(f"{issue}")
return {"verdict": verdict, "issues": issues}
def step_report(state: LoopState, verify_result: dict):
"""Step 5: Alert if degraded/emergency."""
if state.status == "RUNNING":
return
if not state.can_alert():
log.info("REPORT: Alert cooldown active, skipping")
return
severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
msg = (
f"Darkplex Loop {severity}\n"
f"Consecutive failures: {state.consecutive_failures}\n"
f"Error: {state.error}\n"
f"Issues: {', '.join(verify_result.get('issues', []))}"
)
log.warning(f"REPORT: Sending alert — {state.status}")
try:
subprocess.run(
["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
capture_output=True, text=True, timeout=15,
)
except Exception:
pass
flag = LOG_DIR / "darkplex-loop-alert.flag"
flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
state.mark_alerted()
# ── Helpers ──────────────────────────────────────────────────────────────────
def _importance(text: str) -> float:
"""Importance scoring for event text."""
if not text:
return 0.0
score = 0.3
if len(text) > 200: score += 0.1
if len(text) > 500: score += 0.1
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
if caps > 3: score += 0.1
if caps > 8: score += 0.1
for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
if p.lower() in text.lower():
score -= 0.3
for w in ["meeting", "project", "company", "contract", "decision", "strategy",
"budget", "deadline", "milestone", "partnership", "investment", "revenue",
"client", "proposal", "agreement"]:
if w in text.lower():
score += 0.05
return max(0.0, min(1.0, score))
def print_status():
"""Print current loop state."""
state = LoopState()
ent_count = rel_count = 0
try:
ent_count = len(json.loads(ENTITIES_FILE.read_text()))
except Exception:
pass
try:
rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
except Exception:
pass
icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "")
print(f"{icon} Status: {state.status}")
print(f"Cycles: {state.cycle_count}")
print(f"Last cycle: {state.last_cycle or 'never'}")
print(f"Last success: {state.last_success or 'never'}")
print(f"Last failure: {state.last_failure or 'never'}")
print(f"Failures: {state.consecutive_failures}")
print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
print(f"Relationships:{rel_count} total")
if state.error:
print(f"Error: {state.error}")
# ── Main Loop ────────────────────────────────────────────────────────────────
def _ms_since(t0: float) -> int:
return int((time.monotonic() - t0) * 1000)
def run_cycle(state: LoopState) -> bool:
"""Run one complete pipeline cycle. Returns True on success."""
log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
step_results = {}
perf = {}
t_cycle = time.monotonic()
try:
t0 = time.monotonic()
ingest = step_ingest(state)
perf["ingest_ms"] = _ms_since(t0)
step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
# Early skip if no new events
if ingest.get("skip_reason") == "no_new_events":
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
state.save()
log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
return True
t0 = time.monotonic()
extract = step_extract(state, ingest["events"])
perf["extract_ms"] = _ms_since(t0)
step_results["extract"] = extract
t0 = time.monotonic()
bridge = step_bridge(state)
perf["bridge_ms"] = _ms_since(t0)
step_results["bridge"] = bridge
t0 = time.monotonic()
verify = step_verify(state, extract)
perf["verify_ms"] = _ms_since(t0)
step_results["verify"] = verify
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
state.record_failure("verify", "; ".join(verify["issues"]))
step_report(state, verify)
return False
state.record_success(step_results)
avgs = state.perf_averages()
log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status}{perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
flag = LOG_DIR / "darkplex-loop-alert.flag"
if flag.exists():
flag.unlink()
return True
except Exception as e:
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
step_name = "unknown"
for name in ["ingest", "extract", "bridge", "verify"]:
if name not in step_results:
step_name = name
break
log.error(f"CYCLE FAILED at {step_name}: {e}")
log.error(traceback.format_exc())
state.record_failure(step_name, str(e)[:300])
step_report(state, {"issues": [str(e)]})
return False
def main():
"""CLI entry point for `darkplex loop`."""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
logging.StreamHandler(),
],
)
LOG_DIR.mkdir(parents=True, exist_ok=True)
args = sys.argv[1:]
if "--status" in args:
print_status()
return
if "--check" in args:
pending = check_new_events()
if pending > 0:
print(f"NEW: {pending} events pending")
sys.exit(0)
elif pending == 0:
print("NONE: No new events")
sys.exit(1)
else:
print("ERROR: Could not check")
sys.exit(2)
once = "--once" in args
cycle_seconds = DEFAULT_CYCLE_SECONDS
for i, arg in enumerate(args):
if arg == "--cycle" and i + 1 < len(args):
cycle_seconds = int(args[i + 1])
state = LoopState()
log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
running = True
def handle_signal(sig, frame):
nonlocal running
log.info("Shutdown signal received")
running = False
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while running:
run_cycle(state)
if once:
break
log.info(f"Sleeping {cycle_seconds}s until next cycle...")
for _ in range(cycle_seconds):
if not running:
break
time.sleep(1)
log.info("Darkplex Loop stopped")