darkplex-core/cortex/memory/boot_assembler.py
Claudia 0484c6321a
Some checks failed
Tests / test (push) Failing after 5s
feat(memory): add session memory persistence module
New cortex/memory/ module that provides:
- boot_assembler: builds BOOTSTRAP.md from threads, decisions, narrative
- thread_tracker: tracks conversation threads across sessions via NATS
- narrative_generator: daily narrative with Ollama LLM (fallback: structured)
- pre_compaction: snapshot pipeline before context compaction

CLI commands:
- cortex memory bootstrap [--dry-run] [--workspace DIR]
- cortex memory snapshot [--workspace DIR]
- cortex memory threads [--summary] [--hours N]

All paths configurable via WORKSPACE_DIR, NATS_URL, AGENT_NAME env vars.
No hardcoded paths. Works with any OpenClaw agent.

Fixes array/dict handling for empty threads.json and decisions.json.
2026-02-13 11:52:25 +01:00

290 lines
10 KiB
Python

"""Boot Assembler — Query-driven boot context generator.
Reads threads, decisions, narrative, and knowledge to assemble a dense BOOTSTRAP.md.
All paths derived from WORKSPACE_DIR. No hardcoded paths.
"""
import json
import os
import subprocess
from datetime import datetime, timezone, timedelta
from pathlib import Path
from .common import (
get_workspace_dir, get_reboot_dir, get_agent_name,
load_json, load_facts,
)
DEFAULT_MAX_CHARS = 16000 # ~4000 tokens
def _load_threads_data(reboot_dir: Path) -> dict:
data = load_json(reboot_dir / "threads.json")
if isinstance(data, list):
return {"threads": data}
return data if isinstance(data, dict) else {}
def _get_open_threads(reboot_dir: Path, limit: int = 7) -> list[dict]:
data = _load_threads_data(reboot_dir)
threads = [t for t in data.get("threads", []) if t.get("status") == "open"]
priority_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
threads.sort(key=lambda t: (
priority_order.get(t.get("priority", "low"), 3),
-(datetime.fromisoformat(
t.get("last_activity", "2000-01-01T00:00:00Z").replace("Z", "+00:00")
).timestamp())
))
return threads[:limit]
def _integrity_warning(reboot_dir: Path) -> str:
data = _load_threads_data(reboot_dir)
integrity = data.get("integrity", {})
last_ts = integrity.get("last_nats_timestamp")
if not last_ts:
return "⚠️ No integrity data — thread tracker may not have run yet."
try:
if len(last_ts) <= 10:
last_dt = datetime.strptime(last_ts, "%Y-%m-%d").replace(tzinfo=timezone.utc)
else:
last_dt = datetime.fromisoformat(last_ts.replace("Z", "+00:00"))
age_min = (datetime.now(timezone.utc) - last_dt).total_seconds() / 60
if age_min > 480:
return f"🚨 STALE DATA: Thread data is {age_min/60:.0f}h old."
elif age_min > 120:
return f"⚠️ Data staleness: Thread data is {age_min/60:.0f}h old."
return ""
except Exception:
return "⚠️ Could not parse integrity timestamp."
def _load_hot_snapshot(reboot_dir: Path) -> str:
f = reboot_dir / "hot-snapshot.md"
try:
if not f.exists():
return ""
mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
if datetime.now(timezone.utc) - mtime > timedelta(hours=1):
return ""
return f.read_text().strip()[:1000]
except Exception:
return ""
def _load_decisions(reboot_dir: Path) -> list[dict]:
data = load_json(reboot_dir / "decisions.json")
if isinstance(data, list):
data = {"decisions": data}
if not isinstance(data, dict):
return []
cutoff = (datetime.now(timezone.utc) - timedelta(days=14)).strftime("%Y-%m-%d")
recent = [d for d in data.get("decisions", []) if d.get("date", "") >= cutoff]
return recent[-10:]
def _load_narrative(reboot_dir: Path) -> str:
f = reboot_dir / "narrative.md"
try:
if not f.exists():
return ""
mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
if datetime.now(timezone.utc) - mtime > timedelta(hours=36):
return ""
return f.read_text().strip()[:2000]
except Exception:
return ""
def _query_knowledge_for_thread(thread: dict, facts: list[dict]) -> list[str]:
"""Score facts by keyword overlap with thread."""
results = []
query_terms = thread.get("title", "") + " " + thread.get("summary", "")
query_words = set(query_terms.lower().split())
scored = []
for fact in facts:
text = fact.get("text", "").lower()
priority = fact.get("priority", "normal")
boost = {"critical": 4, "high": 2, "normal": 1, "low": 0.5}.get(priority, 1)
overlap = len(query_words & set(text.split()))
if overlap > 0:
score = overlap * boost
created = fact.get("created", "")
if created and created[:10] >= (datetime.now(timezone.utc) - timedelta(days=7)).strftime("%Y-%m-%d"):
score *= 1.5
scored.append((score, fact))
scored.sort(key=lambda x: -x[0])
for score, fact in scored[:3]:
prio = fact.get("priority", "?")
conf = fact.get("confidence", 1.0)
text = fact.get("text", "")[:150]
results.append(f" [{prio}|{conf:.0%}] {text}")
return results[:5]
def _get_execution_mode() -> str:
hour = datetime.now().hour
if 6 <= hour < 12:
return "Morning — brief, directive, efficient"
elif 12 <= hour < 18:
return "Afternoon — execution mode"
elif 18 <= hour < 22:
return "Evening — strategic, philosophical possible"
return "Night — emergencies only"
def assemble(workspace: Path = None, max_chars: int = DEFAULT_MAX_CHARS,
facts_file: Path = None, calendar_cmd: list[str] = None,
wellbeing_file: Path = None) -> str:
"""Assemble BOOTSTRAP.md content.
Args:
workspace: Workspace directory (default: WORKSPACE_DIR or cwd)
max_chars: Character budget
facts_file: Path to facts.jsonl for knowledge queries
calendar_cmd: Command to run for calendar events (optional)
wellbeing_file: Path to wellbeing.json (optional)
"""
ws = workspace or get_workspace_dir()
reboot_dir = get_reboot_dir(ws)
agent = get_agent_name()
now = datetime.now(timezone.utc)
local_now = datetime.now()
parts = []
parts.append(f"# Context Briefing")
parts.append(f"Agent: {agent} | Generated: {now.isoformat()[:19]}Z | Local: {local_now.strftime('%H:%M')}")
parts.append("")
# State
parts.append("## ⚡ State")
parts.append(f"Mode: {_get_execution_mode()}")
# Wellbeing (optional)
if wellbeing_file:
wb = load_json(wellbeing_file)
if wb:
parts.append(f"Wellbeing: {wb.get('status', '?')} ({wb.get('overall', 0):.0%}) trend:{wb.get('history_trend', '?')}")
# Session mood
td = _load_threads_data(reboot_dir)
mood = td.get("session_mood", "neutral")
if mood != "neutral":
emoji = {"frustrated": "😤", "excited": "🔥", "tense": "", "productive": "🔧", "exploratory": "🔬"}.get(mood, "")
parts.append(f"Last session mood: {mood} {emoji}")
warning = _integrity_warning(reboot_dir)
if warning:
parts.append(f"\n{warning}")
# Calendar (optional external command)
if calendar_cmd:
try:
result = subprocess.run(calendar_cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0 and result.stdout.strip():
parts.append(f"\n### 📅 Today")
parts.append("\n".join(result.stdout.strip().split("\n")[:10]))
except Exception:
pass
parts.append("")
# Hot snapshot
hot = _load_hot_snapshot(reboot_dir)
if hot:
parts.append("## 🔥 Last Session Snapshot")
parts.append(hot)
parts.append("")
# Narrative
narrative = _load_narrative(reboot_dir)
if narrative:
parts.append("## 📖 Narrative (last 24h)")
parts.append(narrative)
parts.append("")
# Threads + knowledge
threads = _get_open_threads(reboot_dir)
all_facts = load_facts(facts_file) if facts_file and facts_file.exists() else []
if threads:
parts.append("## 🧵 Active Threads")
for t in threads:
prio_emoji = {"critical": "🔴", "high": "🟠", "medium": "🟡", "low": "🔵"}.get(t.get("priority"), "")
mood_tag = f" [{t.get('mood', '')}]" if t.get("mood", "neutral") != "neutral" else ""
parts.append(f"\n### {prio_emoji} {t['title']}{mood_tag}")
parts.append(f"Priority: {t.get('priority', '?')} | Last: {t.get('last_activity', '?')[:16]}")
parts.append(f"Summary: {t.get('summary', 'no summary')}")
if t.get("waiting_for"):
parts.append(f"⏳ Waiting for: {t['waiting_for']}")
if t.get("decisions"):
parts.append(f"Decisions: {', '.join(t['decisions'])}")
if all_facts:
knowledge = _query_knowledge_for_thread(t, all_facts)
if knowledge:
parts.append("Knowledge:")
parts.extend(knowledge)
parts.append("")
# Decisions
decisions = _load_decisions(reboot_dir)
if decisions:
parts.append("## 🎯 Recent Decisions")
for d in decisions:
ie = {"critical": "🔴", "high": "🟠", "medium": "🟡"}.get(d.get("impact"), "")
parts.append(f"- {ie} **{d['what']}** ({d.get('date', '?')})")
if d.get("why"):
parts.append(f" Why: {d['why'][:100]}")
parts.append("")
# Footer
parts.append("---")
parts.append(f"_Boot context | {len(threads)} active threads | {len(decisions)} recent decisions_")
result = "\n".join(parts)
if len(result) > max_chars:
result = result[:max_chars] + "\n\n_[truncated to token budget]_"
return result
def run(dry_run: bool = False, max_tokens: int = 4000, workspace: Path = None,
facts_file: Path = None, calendar_cmd: list[str] = None,
wellbeing_file: Path = None):
"""Run boot assembler."""
ws = workspace or get_workspace_dir()
output_file = ws / "BOOTSTRAP.md"
bootstrap = assemble(
workspace=ws,
max_chars=max_tokens * 4,
facts_file=facts_file,
calendar_cmd=calendar_cmd,
wellbeing_file=wellbeing_file,
)
if dry_run:
print(bootstrap)
print(f"\n--- Stats: {len(bootstrap)} chars, ~{len(bootstrap)//4} tokens ---")
else:
output_file.write_text(bootstrap)
print(f"✅ BOOTSTRAP.md written ({len(bootstrap)} chars, ~{len(bootstrap)//4} tokens)")
def main():
import argparse
parser = argparse.ArgumentParser(description="Boot Assembler — Query-driven boot context")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--max-tokens", type=int, default=4000)
parser.add_argument("--workspace", type=str, help="Workspace directory")
parser.add_argument("--facts-file", type=str, help="Path to facts.jsonl")
args = parser.parse_args()
ws = Path(args.workspace) if args.workspace else None
ff = Path(args.facts_file) if args.facts_file else None
run(dry_run=args.dry_run, max_tokens=args.max_tokens, workspace=ws, facts_file=ff)
if __name__ == "__main__":
main()