darkplex-core/cortex/intelligence/loop.py
Claudia 0484c6321a
Some checks failed
Tests / test (push) Failing after 5s
feat(memory): add session memory persistence module
New cortex/memory/ module that provides:
- boot_assembler: builds BOOTSTRAP.md from threads, decisions, narrative
- thread_tracker: tracks conversation threads across sessions via NATS
- narrative_generator: daily narrative with Ollama LLM (fallback: structured)
- pre_compaction: snapshot pipeline before context compaction

CLI commands:
- cortex memory bootstrap [--dry-run] [--workspace DIR]
- cortex memory snapshot [--workspace DIR]
- cortex memory threads [--summary] [--hours N]

All paths configurable via WORKSPACE_DIR, NATS_URL, AGENT_NAME env vars.
No hardcoded paths. Works with any OpenClaw agent.

Fixes array/dict handling for empty threads.json and decisions.json.
2026-02-13 11:52:25 +01:00

830 lines
30 KiB
Python

#!/usr/bin/env python3
"""
Darkplex Loop — The single heartbeat of the intelligence pipeline.
One process. One loop. One state machine.
Replaces: cron-smart-extractor, knowledge-bridge, knowledge-ingest, pipeline-health.
Each cycle:
1. INGEST — Fetch new events from NATS (batch consumer pull)
2. EXTRACT — Pull entities and relationships from events
3. BRIDGE — Sync cortex outputs to knowledge engine
4. VERIFY — Check that real output was produced
5. REPORT — Update state, alert on failure
States:
RUNNING — Everything nominal
DEGRADED — A step failed, but loop continues with recovery attempts
EMERGENCY — Critical failure, alerting
Usage:
darkplex loop # Run loop (default: 1h cycle)
darkplex loop --once # Single cycle, then exit
darkplex loop --cycle 3600 # Custom cycle interval (seconds)
darkplex loop --status # Print current state and exit
darkplex loop --check # Check for new events, exit 0=new 1=none
"""
import json
import logging
import os
import re
import signal
import subprocess
import sys
import time
import traceback
import urllib.request
from collections import deque
from datetime import datetime, timezone
from pathlib import Path
# ── Paths (configurable via env) ─────────────────────────────────────────────
BASE_DIR = Path(os.environ.get("DARKPLEX_WORKSPACE", Path.home() / "clawd"))
SCRIPT_DIR = BASE_DIR / "scripts"
LEVEL4_DIR = SCRIPT_DIR / "level4"
LOG_DIR = BASE_DIR / "logs"
STATE_FILE = BASE_DIR / "memory" / "darkplex-loop-state.json"
KNOWLEDGE_DIR = Path(os.environ.get("DARKPLEX_KNOWLEDGE_DIR", Path.home() / ".cortex" / "knowledge"))
ENTITIES_FILE = KNOWLEDGE_DIR / "entities.json"
RELATIONSHIPS_FILE = KNOWLEDGE_DIR / "relationships.json"
NATS_STREAM = os.environ.get("DARKPLEX_NATS_STREAM", "openclaw-events")
NATS_CONSUMER = os.environ.get("DARKPLEX_NATS_CONSUMER", "darkplex-loop")
NATS_BATCH_SIZE = int(os.environ.get("DARKPLEX_NATS_BATCH", "2000"))
DEFAULT_CYCLE_SECONDS = 3600 # 1 hour
ALERT_COOLDOWN = 3600 # 1 alert per hour max
log = logging.getLogger("darkplex-loop")
# ── State Machine ────────────────────────────────────────────────────────────
class LoopState:
"""Persistent state for the Darkplex Loop."""
def __init__(self):
self.status = "INIT"
self.cycle_count = 0
self.last_cycle = None
self.last_success = None
self.last_failure = None
self.last_alert = None
self.consecutive_failures = 0
self.entities_total = 0
self.relationships_total = 0
self.entities_extracted_last = 0
self.entities_new_last = 0
self.events_processed_last = 0
self.steps = {}
self.error = None
self.perf = {} # last cycle: ingest_ms, extract_ms, bridge_ms, verify_ms, total_ms
self.perf_history = [] # last 10 cycles [{total_ms, ingest_ms, ...}]
self.quality_metrics = {} # {unknown_rate, llm_success_rate, avg_entities_per_event}
self.quality_history = [] # last 10: [{cycle, unknown_rate, llm_success_rate}]
self.ollama_status = "unknown" # healthy|degraded|down
self._load()
def _load(self):
try:
data = json.loads(STATE_FILE.read_text())
for k, v in data.items():
if hasattr(self, k):
setattr(self, k, v)
except (FileNotFoundError, json.JSONDecodeError):
pass
def save(self):
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(self.__dict__, indent=2, default=str))
def record_perf(self, perf: dict):
"""Record performance metrics for this cycle."""
self.perf = perf
# Include unknown_rate in perf_history if available
if self.quality_metrics:
perf["unknown_rate"] = self.quality_metrics.get("unknown_rate", 0)
self.perf_history.append(perf)
self.perf_history = self.perf_history[-10:] # keep last 10
def perf_averages(self) -> dict:
"""Running averages over last 10 cycles."""
if not self.perf_history:
return {}
keys = self.perf_history[0].keys()
return {k: int(sum(p.get(k, 0) for p in self.perf_history) / len(self.perf_history)) for k in keys}
def record_success(self, step_results: dict):
self.status = "RUNNING"
self.consecutive_failures = 0
self.last_success = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_success
self.cycle_count += 1
self.steps = step_results
self.error = None
self.save()
def record_failure(self, step: str, error: str):
self.consecutive_failures += 1
self.last_failure = datetime.now(timezone.utc).isoformat()
self.last_cycle = self.last_failure
self.cycle_count += 1
self.error = f"{step}: {error}"
if self.consecutive_failures >= 3:
self.status = "EMERGENCY"
else:
self.status = "DEGRADED"
self.save()
def can_alert(self) -> bool:
if not self.last_alert:
return True
try:
last = datetime.fromisoformat(self.last_alert)
return (datetime.now(timezone.utc) - last).total_seconds() > ALERT_COOLDOWN
except (ValueError, TypeError):
return True
def mark_alerted(self):
self.last_alert = datetime.now(timezone.utc).isoformat()
self.save()
# ── Pipeline Steps ───────────────────────────────────────────────────────────
def _nats_cmd():
"""Build NATS CLI base command with auth."""
nats_bin = os.environ.get("NATS_BIN", "nats")
nats_url = os.environ.get("NATS_URL", "")
if nats_url:
return [nats_bin, "-s", nats_url]
return [nats_bin]
def check_new_events() -> int:
"""Return number of pending events in the consumer. 0 = nothing new."""
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return -1
info = json.loads(r.stdout)
return info.get("num_pending", 0)
except Exception as e:
log.warning(f"check_new_events failed: {e}")
return -1
def step_ingest(state: LoopState) -> dict:
"""Step 1: Fetch new events from NATS using batch consumer pull."""
log.info("STEP 1: INGEST — Fetching events from NATS")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
# Check how many pending
pending = check_new_events()
if pending == 0:
log.info("INGEST: No new events — skipping cycle")
return {"events": [], "total_scanned": 0, "skipped": 0, "skip_reason": "no_new_events"}
log.info(f"INGEST: {pending} pending events in consumer")
events = []
total_fetched = 0
parse_errors = 0
# Fetch in batches
remaining = min(pending, NATS_BATCH_SIZE) if pending > 0 else NATS_BATCH_SIZE
try:
batch_size = min(remaining, NATS_BATCH_SIZE)
result = subprocess.run(
_nats_cmd() + ["consumer", "next", NATS_STREAM, NATS_CONSUMER,
"--count", str(batch_size), "--raw"],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
log.warning(f"Batch fetch failed (rc={result.returncode}), falling back to sequential")
return _step_ingest_sequential(state)
for line in result.stdout.strip().split("\n"):
if not line.strip():
continue
try:
data = json.loads(line)
events.append(data)
total_fetched += 1
except json.JSONDecodeError:
parse_errors += 1
except subprocess.TimeoutExpired:
log.warning("Batch fetch timed out, falling back to sequential")
return _step_ingest_sequential(state)
# Update sequence tracking (get current stream seq from consumer info)
try:
r = subprocess.run(
_nats_cmd() + ["consumer", "info", NATS_STREAM, NATS_CONSUMER, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode == 0:
info = json.loads(r.stdout)
stream_seq = info["delivered"]["stream_seq"]
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": stream_seq}))
except Exception:
log.warning("Could not save last processed sequence")
log.info(f"INGEST: {len(events)} events fetched in batch ({parse_errors} parse errors)")
return {"events": events, "total_scanned": total_fetched + parse_errors, "skipped": parse_errors}
def _step_ingest_sequential(state: LoopState) -> dict:
"""Fallback: sequential fetch via stream get (slow but reliable)."""
import base64
log.info("INGEST FALLBACK: Sequential fetch")
last_processed_seq_file = BASE_DIR / "memory" / "darkplex-last-processed-seq.json"
last_processed_seq = 0
try:
if last_processed_seq_file.exists():
last_processed_seq = json.loads(last_processed_seq_file.read_text()).get("last_seq", 0)
except Exception:
pass
r = subprocess.run(
_nats_cmd() + ["stream", "info", NATS_STREAM, "--json"],
capture_output=True, text=True, timeout=10,
)
if r.returncode != 0:
return {"events": [], "total_scanned": 0, "skipped": 0}
info = json.loads(r.stdout)
end_seq = info["state"]["last_seq"]
start_seq = max(last_processed_seq + 1, end_seq - NATS_BATCH_SIZE)
events = []
skipped = 0
for seq in range(start_seq, end_seq + 1):
try:
result = subprocess.run(
_nats_cmd() + ["stream", "get", NATS_STREAM, str(seq), "--json"],
capture_output=True, text=True, timeout=5,
)
if result.returncode != 0:
skipped += 1
continue
msg = json.loads(result.stdout)
if "conversation_message_in" not in msg.get("subject", ""):
skipped += 1
continue
data = json.loads(base64.b64decode(msg["data"]).decode("utf-8"))
events.append(data)
except Exception:
skipped += 1
try:
last_processed_seq_file.parent.mkdir(parents=True, exist_ok=True)
last_processed_seq_file.write_text(json.dumps({"last_seq": end_seq}))
except Exception:
pass
log.info(f"INGEST (sequential): {len(events)} events (scanned {end_seq - start_seq + 1}, skipped {skipped})")
return {"events": events, "total_scanned": end_seq - start_seq + 1, "skipped": skipped}
def step_extract(state: LoopState, events: list) -> dict:
"""Step 2: Extract entities and relationships from events."""
log.info(f"STEP 2: EXTRACT — Processing {len(events)} events")
if not events:
log.info("EXTRACT: No events to process")
return {"extracted": 0, "new_entities": 0, "new_relationships": 0}
sys.path.insert(0, str(LEVEL4_DIR))
import importlib.util
spec = importlib.util.spec_from_file_location("entity_manager", LEVEL4_DIR / "entity-manager.py")
em = importlib.util.module_from_spec(spec)
spec.loader.exec_module(em)
# Try LLM batch extraction first
from cortex.llm_extractor import extract_entities_llm_batch, is_available as llm_available
use_llm = os.environ.get("DARKPLEX_EXTRACTOR", "auto").lower() in ("llm", "auto")
llm_ok = use_llm and llm_available()
if llm_ok:
log.info("EXTRACT: Using LLM extractor (Ollama)")
else:
log.info("EXTRACT: Using regex extractor (fallback)")
known = em.load_known_entities()
entities = em.load_json(ENTITIES_FILE)
relationships = em.load_json(RELATIONSHIPS_FILE)
total_extracted = 0
new_entities = 0
new_relationships = 0
ts_now = time.strftime("%Y-%m-%dT%H:%M:%S")
# Prepare texts for potential batch LLM processing
event_texts = []
for event in events:
payload = event.get("payload", {})
text = payload.get("text_preview", "") or payload.get("text", "")
if isinstance(text, list):
parts = []
for t in text:
parts.append(t.get("text", "") if isinstance(t, dict) else str(t))
text = " ".join(parts)
if not isinstance(text, str):
text = str(text)
score = _importance(text) if text else 0.0
event_texts.append((text, score))
# LLM batch extraction for qualifying texts (cap at 50 to keep cycle time reasonable)
llm_results = {}
if llm_ok:
batch_texts = [t for t, s in sorted(
[(t, s) for t, s in event_texts if t and s >= 0.4],
key=lambda x: -x[1] # highest importance first
)][:50]
if batch_texts:
consecutive_fails = 0
for i in range(0, len(batch_texts), 10):
if consecutive_fails >= 3:
log.warning("EXTRACT: 3 consecutive LLM failures, falling back to regex")
llm_ok = False
break
chunk = batch_texts[i:i+10]
batch_result = extract_entities_llm_batch(chunk)
if batch_result:
llm_results.update(batch_result)
consecutive_fails = 0
else:
consecutive_fails += 1
if llm_results:
log.info(f"EXTRACT: LLM batch found {len(llm_results)} entities")
for idx, event in enumerate(events):
text, score = event_texts[idx]
if not text or score < 0.4:
continue
if llm_ok and llm_results:
# Use LLM results + known entity matching
found = em._extract_known(text, known) if hasattr(em, '_extract_known') else {}
# Add LLM entities that appear in this text
text_lower = text.lower()
for name, info in llm_results.items():
variants = [name, name.replace("-", " "), name.replace("-", "")]
if any(v in text_lower for v in variants if len(v) > 2):
found[name] = info
else:
found = em.extract_entities(text, known)
if not found:
continue
total_extracted += len(found)
names = list(found.keys())
for name, info in found.items():
if name not in entities:
entities[name] = {
"type": info["type"],
"source": "darkplex-loop",
"first_seen": ts_now,
}
new_entities += 1
known[name] = entities[name]
if len(names) >= 2:
for i in range(len(names)):
for j in range(i + 1, min(len(names), i + 5)):
a, b = min(names[i], names[j]), max(names[i], names[j])
key = f"{a}::{b}"
if key in relationships:
relationships[key]["count"] = relationships[key].get("count", 1) + 1
relationships[key]["last_seen"] = ts_now
else:
relationships[key] = {
"a": a, "b": b, "types": ["co-occurrence"],
"count": 1, "first_seen": ts_now, "last_seen": ts_now,
}
new_relationships += 1
em.save_json(ENTITIES_FILE, entities)
em.save_json(RELATIONSHIPS_FILE, relationships)
state.entities_total = len(entities)
state.relationships_total = len(relationships)
state.entities_extracted_last = total_extracted
state.entities_new_last = new_entities
state.events_processed_last = len(events)
log.info(f"EXTRACT: {total_extracted} entities ({new_entities} new), {new_relationships} new relationships")
return {"extracted": total_extracted, "new_entities": new_entities, "new_relationships": new_relationships}
def step_bridge(state: LoopState) -> dict:
"""Step 3: Run knowledge bridge."""
log.info("STEP 3: BRIDGE — Syncing cortex outputs")
bridge_script = SCRIPT_DIR / "knowledge-bridge.py"
if not bridge_script.exists():
log.warning("BRIDGE: knowledge-bridge.py not found, skipping")
return {"status": "skipped", "reason": "script not found"}
result = subprocess.run(
[sys.executable, str(bridge_script), "sync"],
capture_output=True, text=True, timeout=120,
)
if result.returncode != 0:
log.warning(f"BRIDGE: Failed — {result.stderr[:200]}")
return {"status": "failed", "error": result.stderr[:200]}
bridged = 0
for line in result.stdout.split("\n"):
m = re.search(r"(\d+)\s+(?:new|bridged|added)", line, re.I)
if m:
bridged += int(m.group(1))
log.info(f"BRIDGE: {bridged} items bridged")
return {"status": "ok", "bridged": bridged}
def _check_quality(state: LoopState, extract_result: dict) -> list:
"""Check entity quality metrics. Returns list of issues/warnings."""
issues = []
# Load entities and compute unknown_rate
try:
entities = json.loads(ENTITIES_FILE.read_text()) if ENTITIES_FILE.exists() else {}
except (json.JSONDecodeError, OSError):
entities = {}
total = len(entities)
unknown_count = sum(1 for e in entities.values() if e.get("type") == "unknown")
unknown_rate = (unknown_count / total * 100) if total > 0 else 0.0
events_processed = state.events_processed_last or 1
extracted = extract_result.get("extracted", 0)
avg_entities_per_event = extracted / events_processed if events_processed > 0 else 0.0
# Estimate LLM success rate from extraction (if LLM was used, new_entities > 0 is a proxy)
llm_success_rate = 100.0 # default if no LLM used
# We track this per-cycle based on whether extraction produced results
if events_processed > 10 and extracted == 0:
llm_success_rate = 0.0
state.quality_metrics = {
"unknown_rate": round(unknown_rate, 1),
"llm_success_rate": round(llm_success_rate, 1),
"avg_entities_per_event": round(avg_entities_per_event, 2),
}
if unknown_rate > 30:
issues.append(f"High unknown entity rate: {unknown_rate:.1f}% ({unknown_count}/{total})")
# Track quality history and detect trends
state.quality_history.append({
"cycle": state.cycle_count + 1,
"unknown_rate": round(unknown_rate, 1),
"llm_success_rate": round(llm_success_rate, 1),
})
state.quality_history = state.quality_history[-10:] # keep last 10
# Check if unknown_rate rising 3 cycles in a row
if len(state.quality_history) >= 3:
last3 = [h["unknown_rate"] for h in state.quality_history[-3:]]
if last3[0] < last3[1] < last3[2]:
issues.append(f"Entity quality degrading — unknown_rate rising: {last3}")
log.info(f"VERIFY/QUALITY: unknown_rate={unknown_rate:.1f}%, avg_entities/event={avg_entities_per_event:.2f}")
return issues
def _check_ollama(state: LoopState) -> list:
"""Check Ollama health. Returns list of issues."""
issues = []
model = os.environ.get("DARKPLEX_OLLAMA_MODEL", os.environ.get("OLLAMA_MODEL", ""))
try:
req = urllib.request.Request("http://localhost:11434/api/tags", method="GET")
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read())
models = [m.get("name", "") for m in data.get("models", [])]
if model and not any(model in m for m in models):
state.ollama_status = "degraded"
issues.append(f"Ollama up but model '{model}' not loaded (available: {models[:5]})")
log.warning(f"VERIFY/OLLAMA: degraded — model '{model}' not in {models[:5]}")
else:
state.ollama_status = "healthy"
log.info(f"VERIFY/OLLAMA: healthy ({len(models)} models)")
except Exception as e:
state.ollama_status = "down"
issues.append(f"Ollama down: {e}")
log.warning(f"VERIFY/OLLAMA: down — {e}")
return issues
def _check_performance(state: LoopState) -> list:
"""Check for performance regressions. Returns list of issues."""
issues = []
if len(state.perf_history) < 2:
return issues
current = state.perf
avgs = state.perf_averages()
# Check total time vs rolling average
curr_total = current.get("total_ms", 0)
avg_total = avgs.get("total_ms", 0)
if avg_total > 0 and curr_total > 2 * avg_total:
issues.append(f"Performance regression detected: {curr_total}ms vs avg {avg_total}ms")
# Check extraction time
extract_ms = current.get("extract_ms", 0)
if extract_ms > 120000:
issues.append(f"Extraction too slow: {extract_ms}ms (>2min)")
if issues:
for i in issues:
log.warning(f"VERIFY/PERF: {i}")
else:
log.info(f"VERIFY/PERF: OK (total={curr_total}ms, avg={avg_total}ms)")
return issues
def step_verify(state: LoopState, extract_result: dict) -> dict:
"""Step 4: Verify output quality."""
log.info("STEP 4: VERIFY — Checking output quality")
issues = []
# File integrity checks
for f, label in [(ENTITIES_FILE, "entities"), (RELATIONSHIPS_FILE, "relationships")]:
if not f.exists():
issues.append(f"{label} file missing")
else:
try:
data = json.loads(f.read_text())
if not data:
issues.append(f"{label} file is empty")
except json.JSONDecodeError:
issues.append(f"{label} file is corrupt JSON")
events_processed = state.events_processed_last
extracted = extract_result.get("extracted", 0)
if events_processed > 10 and extracted == 0:
issues.append(f"0 entities from {events_processed} events — extraction may be broken")
# NATS check
try:
r = subprocess.run(["nats", "stream", "ls", "--json"], capture_output=True, text=True, timeout=10)
if r.returncode != 0:
issues.append("NATS unreachable")
except Exception as e:
issues.append(f"NATS check failed: {e}")
# New monitoring checks
issues.extend(_check_quality(state, extract_result))
issues.extend(_check_ollama(state))
issues.extend(_check_performance(state))
verdict = "PASS" if not issues else "FAIL"
log.info(f"VERIFY: {verdict}{len(issues)} issues")
for issue in issues:
log.warning(f"{issue}")
return {"verdict": verdict, "issues": issues}
def step_report(state: LoopState, verify_result: dict):
"""Step 5: Alert if degraded/emergency."""
if state.status == "RUNNING":
return
if not state.can_alert():
log.info("REPORT: Alert cooldown active, skipping")
return
severity = "🔴 EMERGENCY" if state.status == "EMERGENCY" else "🟡 DEGRADED"
msg = (
f"Darkplex Loop {severity}\n"
f"Consecutive failures: {state.consecutive_failures}\n"
f"Error: {state.error}\n"
f"Issues: {', '.join(verify_result.get('issues', []))}"
)
log.warning(f"REPORT: Sending alert — {state.status}")
try:
subprocess.run(
["python3", str(SCRIPT_DIR / "vera-alert.py"), msg],
capture_output=True, text=True, timeout=15,
)
except Exception:
pass
flag = LOG_DIR / "darkplex-loop-alert.flag"
flag.write_text(f"{datetime.now().isoformat()} {state.status}: {state.error}")
state.mark_alerted()
# ── Helpers ──────────────────────────────────────────────────────────────────
def _importance(text: str) -> float:
"""Importance scoring for event text."""
if not text:
return 0.0
score = 0.3
if len(text) > 200: score += 0.1
if len(text) > 500: score += 0.1
caps = len(re.findall(r"\b[A-Z][a-z]+\b", text))
if caps > 3: score += 0.1
if caps > 8: score += 0.1
for p in ["HEARTBEAT_OK", "heartbeat", "cron:", "health check", "no critical"]:
if p.lower() in text.lower():
score -= 0.3
for w in ["meeting", "project", "company", "contract", "decision", "strategy",
"budget", "deadline", "milestone", "partnership", "investment", "revenue",
"client", "proposal", "agreement"]:
if w in text.lower():
score += 0.05
return max(0.0, min(1.0, score))
def print_status():
"""Print current loop state."""
state = LoopState()
ent_count = rel_count = 0
try:
ent_count = len(json.loads(ENTITIES_FILE.read_text()))
except Exception:
pass
try:
rel_count = len(json.loads(RELATIONSHIPS_FILE.read_text()))
except Exception:
pass
icon = {"RUNNING": "🟢", "DEGRADED": "🟡", "EMERGENCY": "🔴"}.get(state.status, "")
print(f"{icon} Status: {state.status}")
print(f"Cycles: {state.cycle_count}")
print(f"Last cycle: {state.last_cycle or 'never'}")
print(f"Last success: {state.last_success or 'never'}")
print(f"Last failure: {state.last_failure or 'never'}")
print(f"Failures: {state.consecutive_failures}")
print(f"Entities: {ent_count} total (last cycle: {state.entities_extracted_last}, {state.entities_new_last} new)")
print(f"Relationships:{rel_count} total")
if state.quality_metrics:
qm = state.quality_metrics
print(f"Quality: unknown_rate={qm.get('unknown_rate', '?')}% llm_success={qm.get('llm_success_rate', '?')}% avg_ent/event={qm.get('avg_entities_per_event', '?')}")
print(f"Ollama: {state.ollama_status}")
if state.perf:
print(f"Last perf: {state.perf}")
if state.error:
print(f"Error: {state.error}")
# ── Main Loop ────────────────────────────────────────────────────────────────
def _ms_since(t0: float) -> int:
return int((time.monotonic() - t0) * 1000)
def run_cycle(state: LoopState) -> bool:
"""Run one complete pipeline cycle. Returns True on success."""
log.info(f"═══ CYCLE {state.cycle_count + 1} START ═══")
step_results = {}
perf = {}
t_cycle = time.monotonic()
try:
t0 = time.monotonic()
ingest = step_ingest(state)
perf["ingest_ms"] = _ms_since(t0)
step_results["ingest"] = {"events": len(ingest["events"]), "scanned": ingest["total_scanned"]}
# Early skip if no new events
if ingest.get("skip_reason") == "no_new_events":
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
state.save()
log.info(f"═══ CYCLE SKIPPED (no new events) — {perf['total_ms']}ms ═══")
return True
t0 = time.monotonic()
extract = step_extract(state, ingest["events"])
perf["extract_ms"] = _ms_since(t0)
step_results["extract"] = extract
t0 = time.monotonic()
bridge = step_bridge(state)
perf["bridge_ms"] = _ms_since(t0)
step_results["bridge"] = bridge
t0 = time.monotonic()
verify = step_verify(state, extract)
perf["verify_ms"] = _ms_since(t0)
step_results["verify"] = verify
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
if verify["verdict"] == "FAIL" and any("broken" in i or "missing" in i or "corrupt" in i for i in verify["issues"]):
state.record_failure("verify", "; ".join(verify["issues"]))
step_report(state, verify)
return False
state.record_success(step_results)
avgs = state.perf_averages()
log.info(f"═══ CYCLE {state.cycle_count} DONE — {state.status}{perf['total_ms']}ms (avg {avgs.get('total_ms', '?')}ms) ═══")
log.info(f" Perf: ingest={perf.get('ingest_ms')}ms extract={perf.get('extract_ms')}ms bridge={perf.get('bridge_ms')}ms verify={perf.get('verify_ms')}ms")
flag = LOG_DIR / "darkplex-loop-alert.flag"
if flag.exists():
flag.unlink()
return True
except Exception as e:
perf["total_ms"] = _ms_since(t_cycle)
state.record_perf(perf)
step_name = "unknown"
for name in ["ingest", "extract", "bridge", "verify"]:
if name not in step_results:
step_name = name
break
log.error(f"CYCLE FAILED at {step_name}: {e}")
log.error(traceback.format_exc())
state.record_failure(step_name, str(e)[:300])
step_report(state, {"issues": [str(e)]})
return False
def main():
"""CLI entry point for `darkplex loop`."""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_DIR / "darkplex-loop.log"),
logging.StreamHandler(),
],
)
LOG_DIR.mkdir(parents=True, exist_ok=True)
args = sys.argv[1:]
if "--status" in args:
print_status()
return
if "--check" in args:
pending = check_new_events()
if pending > 0:
print(f"NEW: {pending} events pending")
sys.exit(0)
elif pending == 0:
print("NONE: No new events")
sys.exit(1)
else:
print("ERROR: Could not check")
sys.exit(2)
once = "--once" in args
cycle_seconds = DEFAULT_CYCLE_SECONDS
for i, arg in enumerate(args):
if arg == "--cycle" and i + 1 < len(args):
cycle_seconds = int(args[i + 1])
state = LoopState()
log.info(f"Darkplex Loop starting — cycle every {cycle_seconds}s, once={once}")
running = True
def handle_signal(sig, frame):
nonlocal running
log.info("Shutdown signal received")
running = False
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while running:
run_cycle(state)
if once:
break
log.info(f"Sleeping {cycle_seconds}s until next cycle...")
for _ in range(cycle_seconds):
if not running:
break
time.sleep(1)
log.info("Darkplex Loop stopped")