Some checks failed
Tests / test (push) Failing after 5s
New cortex/memory/ module that provides: - boot_assembler: builds BOOTSTRAP.md from threads, decisions, narrative - thread_tracker: tracks conversation threads across sessions via NATS - narrative_generator: daily narrative with Ollama LLM (fallback: structured) - pre_compaction: snapshot pipeline before context compaction CLI commands: - cortex memory bootstrap [--dry-run] [--workspace DIR] - cortex memory snapshot [--workspace DIR] - cortex memory threads [--summary] [--hours N] All paths configurable via WORKSPACE_DIR, NATS_URL, AGENT_NAME env vars. No hardcoded paths. Works with any OpenClaw agent. Fixes array/dict handling for empty threads.json and decisions.json.
290 lines
10 KiB
Python
290 lines
10 KiB
Python
"""Boot Assembler — Query-driven boot context generator.
|
|
|
|
Reads threads, decisions, narrative, and knowledge to assemble a dense BOOTSTRAP.md.
|
|
All paths derived from WORKSPACE_DIR. No hardcoded paths.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
|
|
from .common import (
|
|
get_workspace_dir, get_reboot_dir, get_agent_name,
|
|
load_json, load_facts,
|
|
)
|
|
|
|
DEFAULT_MAX_CHARS = 16000 # ~4000 tokens
|
|
|
|
|
|
def _load_threads_data(reboot_dir: Path) -> dict:
|
|
data = load_json(reboot_dir / "threads.json")
|
|
if isinstance(data, list):
|
|
return {"threads": data}
|
|
return data if isinstance(data, dict) else {}
|
|
|
|
|
|
def _get_open_threads(reboot_dir: Path, limit: int = 7) -> list[dict]:
|
|
data = _load_threads_data(reboot_dir)
|
|
threads = [t for t in data.get("threads", []) if t.get("status") == "open"]
|
|
priority_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
|
threads.sort(key=lambda t: (
|
|
priority_order.get(t.get("priority", "low"), 3),
|
|
-(datetime.fromisoformat(
|
|
t.get("last_activity", "2000-01-01T00:00:00Z").replace("Z", "+00:00")
|
|
).timestamp())
|
|
))
|
|
return threads[:limit]
|
|
|
|
|
|
def _integrity_warning(reboot_dir: Path) -> str:
|
|
data = _load_threads_data(reboot_dir)
|
|
integrity = data.get("integrity", {})
|
|
last_ts = integrity.get("last_nats_timestamp")
|
|
if not last_ts:
|
|
return "⚠️ No integrity data — thread tracker may not have run yet."
|
|
try:
|
|
if len(last_ts) <= 10:
|
|
last_dt = datetime.strptime(last_ts, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
else:
|
|
last_dt = datetime.fromisoformat(last_ts.replace("Z", "+00:00"))
|
|
age_min = (datetime.now(timezone.utc) - last_dt).total_seconds() / 60
|
|
if age_min > 480:
|
|
return f"🚨 STALE DATA: Thread data is {age_min/60:.0f}h old."
|
|
elif age_min > 120:
|
|
return f"⚠️ Data staleness: Thread data is {age_min/60:.0f}h old."
|
|
return ""
|
|
except Exception:
|
|
return "⚠️ Could not parse integrity timestamp."
|
|
|
|
|
|
def _load_hot_snapshot(reboot_dir: Path) -> str:
|
|
f = reboot_dir / "hot-snapshot.md"
|
|
try:
|
|
if not f.exists():
|
|
return ""
|
|
mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
|
|
if datetime.now(timezone.utc) - mtime > timedelta(hours=1):
|
|
return ""
|
|
return f.read_text().strip()[:1000]
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def _load_decisions(reboot_dir: Path) -> list[dict]:
|
|
data = load_json(reboot_dir / "decisions.json")
|
|
if isinstance(data, list):
|
|
data = {"decisions": data}
|
|
if not isinstance(data, dict):
|
|
return []
|
|
cutoff = (datetime.now(timezone.utc) - timedelta(days=14)).strftime("%Y-%m-%d")
|
|
recent = [d for d in data.get("decisions", []) if d.get("date", "") >= cutoff]
|
|
return recent[-10:]
|
|
|
|
|
|
def _load_narrative(reboot_dir: Path) -> str:
|
|
f = reboot_dir / "narrative.md"
|
|
try:
|
|
if not f.exists():
|
|
return ""
|
|
mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
|
|
if datetime.now(timezone.utc) - mtime > timedelta(hours=36):
|
|
return ""
|
|
return f.read_text().strip()[:2000]
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def _query_knowledge_for_thread(thread: dict, facts: list[dict]) -> list[str]:
|
|
"""Score facts by keyword overlap with thread."""
|
|
results = []
|
|
query_terms = thread.get("title", "") + " " + thread.get("summary", "")
|
|
query_words = set(query_terms.lower().split())
|
|
|
|
scored = []
|
|
for fact in facts:
|
|
text = fact.get("text", "").lower()
|
|
priority = fact.get("priority", "normal")
|
|
boost = {"critical": 4, "high": 2, "normal": 1, "low": 0.5}.get(priority, 1)
|
|
overlap = len(query_words & set(text.split()))
|
|
if overlap > 0:
|
|
score = overlap * boost
|
|
created = fact.get("created", "")
|
|
if created and created[:10] >= (datetime.now(timezone.utc) - timedelta(days=7)).strftime("%Y-%m-%d"):
|
|
score *= 1.5
|
|
scored.append((score, fact))
|
|
|
|
scored.sort(key=lambda x: -x[0])
|
|
for score, fact in scored[:3]:
|
|
prio = fact.get("priority", "?")
|
|
conf = fact.get("confidence", 1.0)
|
|
text = fact.get("text", "")[:150]
|
|
results.append(f" [{prio}|{conf:.0%}] {text}")
|
|
|
|
return results[:5]
|
|
|
|
|
|
def _get_execution_mode() -> str:
|
|
hour = datetime.now().hour
|
|
if 6 <= hour < 12:
|
|
return "Morning — brief, directive, efficient"
|
|
elif 12 <= hour < 18:
|
|
return "Afternoon — execution mode"
|
|
elif 18 <= hour < 22:
|
|
return "Evening — strategic, philosophical possible"
|
|
return "Night — emergencies only"
|
|
|
|
|
|
def assemble(workspace: Path = None, max_chars: int = DEFAULT_MAX_CHARS,
|
|
facts_file: Path = None, calendar_cmd: list[str] = None,
|
|
wellbeing_file: Path = None) -> str:
|
|
"""Assemble BOOTSTRAP.md content.
|
|
|
|
Args:
|
|
workspace: Workspace directory (default: WORKSPACE_DIR or cwd)
|
|
max_chars: Character budget
|
|
facts_file: Path to facts.jsonl for knowledge queries
|
|
calendar_cmd: Command to run for calendar events (optional)
|
|
wellbeing_file: Path to wellbeing.json (optional)
|
|
"""
|
|
ws = workspace or get_workspace_dir()
|
|
reboot_dir = get_reboot_dir(ws)
|
|
agent = get_agent_name()
|
|
now = datetime.now(timezone.utc)
|
|
local_now = datetime.now()
|
|
parts = []
|
|
|
|
parts.append(f"# Context Briefing")
|
|
parts.append(f"Agent: {agent} | Generated: {now.isoformat()[:19]}Z | Local: {local_now.strftime('%H:%M')}")
|
|
parts.append("")
|
|
|
|
# State
|
|
parts.append("## ⚡ State")
|
|
parts.append(f"Mode: {_get_execution_mode()}")
|
|
|
|
# Wellbeing (optional)
|
|
if wellbeing_file:
|
|
wb = load_json(wellbeing_file)
|
|
if wb:
|
|
parts.append(f"Wellbeing: {wb.get('status', '?')} ({wb.get('overall', 0):.0%}) trend:{wb.get('history_trend', '?')}")
|
|
|
|
# Session mood
|
|
td = _load_threads_data(reboot_dir)
|
|
mood = td.get("session_mood", "neutral")
|
|
if mood != "neutral":
|
|
emoji = {"frustrated": "😤", "excited": "🔥", "tense": "⚡", "productive": "🔧", "exploratory": "🔬"}.get(mood, "")
|
|
parts.append(f"Last session mood: {mood} {emoji}")
|
|
|
|
warning = _integrity_warning(reboot_dir)
|
|
if warning:
|
|
parts.append(f"\n{warning}")
|
|
|
|
# Calendar (optional external command)
|
|
if calendar_cmd:
|
|
try:
|
|
result = subprocess.run(calendar_cmd, capture_output=True, text=True, timeout=10)
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
parts.append(f"\n### 📅 Today")
|
|
parts.append("\n".join(result.stdout.strip().split("\n")[:10]))
|
|
except Exception:
|
|
pass
|
|
parts.append("")
|
|
|
|
# Hot snapshot
|
|
hot = _load_hot_snapshot(reboot_dir)
|
|
if hot:
|
|
parts.append("## 🔥 Last Session Snapshot")
|
|
parts.append(hot)
|
|
parts.append("")
|
|
|
|
# Narrative
|
|
narrative = _load_narrative(reboot_dir)
|
|
if narrative:
|
|
parts.append("## 📖 Narrative (last 24h)")
|
|
parts.append(narrative)
|
|
parts.append("")
|
|
|
|
# Threads + knowledge
|
|
threads = _get_open_threads(reboot_dir)
|
|
all_facts = load_facts(facts_file) if facts_file and facts_file.exists() else []
|
|
|
|
if threads:
|
|
parts.append("## 🧵 Active Threads")
|
|
for t in threads:
|
|
prio_emoji = {"critical": "🔴", "high": "🟠", "medium": "🟡", "low": "🔵"}.get(t.get("priority"), "⚪")
|
|
mood_tag = f" [{t.get('mood', '')}]" if t.get("mood", "neutral") != "neutral" else ""
|
|
parts.append(f"\n### {prio_emoji} {t['title']}{mood_tag}")
|
|
parts.append(f"Priority: {t.get('priority', '?')} | Last: {t.get('last_activity', '?')[:16]}")
|
|
parts.append(f"Summary: {t.get('summary', 'no summary')}")
|
|
if t.get("waiting_for"):
|
|
parts.append(f"⏳ Waiting for: {t['waiting_for']}")
|
|
if t.get("decisions"):
|
|
parts.append(f"Decisions: {', '.join(t['decisions'])}")
|
|
if all_facts:
|
|
knowledge = _query_knowledge_for_thread(t, all_facts)
|
|
if knowledge:
|
|
parts.append("Knowledge:")
|
|
parts.extend(knowledge)
|
|
parts.append("")
|
|
|
|
# Decisions
|
|
decisions = _load_decisions(reboot_dir)
|
|
if decisions:
|
|
parts.append("## 🎯 Recent Decisions")
|
|
for d in decisions:
|
|
ie = {"critical": "🔴", "high": "🟠", "medium": "🟡"}.get(d.get("impact"), "⚪")
|
|
parts.append(f"- {ie} **{d['what']}** ({d.get('date', '?')})")
|
|
if d.get("why"):
|
|
parts.append(f" Why: {d['why'][:100]}")
|
|
parts.append("")
|
|
|
|
# Footer
|
|
parts.append("---")
|
|
parts.append(f"_Boot context | {len(threads)} active threads | {len(decisions)} recent decisions_")
|
|
|
|
result = "\n".join(parts)
|
|
if len(result) > max_chars:
|
|
result = result[:max_chars] + "\n\n_[truncated to token budget]_"
|
|
return result
|
|
|
|
|
|
def run(dry_run: bool = False, max_tokens: int = 4000, workspace: Path = None,
|
|
facts_file: Path = None, calendar_cmd: list[str] = None,
|
|
wellbeing_file: Path = None):
|
|
"""Run boot assembler."""
|
|
ws = workspace or get_workspace_dir()
|
|
output_file = ws / "BOOTSTRAP.md"
|
|
|
|
bootstrap = assemble(
|
|
workspace=ws,
|
|
max_chars=max_tokens * 4,
|
|
facts_file=facts_file,
|
|
calendar_cmd=calendar_cmd,
|
|
wellbeing_file=wellbeing_file,
|
|
)
|
|
|
|
if dry_run:
|
|
print(bootstrap)
|
|
print(f"\n--- Stats: {len(bootstrap)} chars, ~{len(bootstrap)//4} tokens ---")
|
|
else:
|
|
output_file.write_text(bootstrap)
|
|
print(f"✅ BOOTSTRAP.md written ({len(bootstrap)} chars, ~{len(bootstrap)//4} tokens)")
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Boot Assembler — Query-driven boot context")
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--max-tokens", type=int, default=4000)
|
|
parser.add_argument("--workspace", type=str, help="Workspace directory")
|
|
parser.add_argument("--facts-file", type=str, help="Path to facts.jsonl")
|
|
args = parser.parse_args()
|
|
|
|
ws = Path(args.workspace) if args.workspace else None
|
|
ff = Path(args.facts_file) if args.facts_file else None
|
|
run(dry_run=args.dry_run, max_tokens=args.max_tokens, workspace=ws, facts_file=ff)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|