darkplex-core/cortex/memory/boot_assembler.py

"""Boot Assembler — Query-driven boot context generator.

Reads threads, decisions, narrative, and knowledge to assemble a dense BOOTSTRAP.md.
All paths derived from WORKSPACE_DIR. No hardcoded paths.
"""

import json
import os
import subprocess
from datetime import datetime, timezone, timedelta
from pathlib import Path

from .common import (
    get_workspace_dir, get_reboot_dir, get_agent_name,
    load_json, load_facts,
)

DEFAULT_MAX_CHARS = 16000  # ~4000 tokens


def _load_threads_data(reboot_dir: Path) -> dict:
    data = load_json(reboot_dir / "threads.json")
    if isinstance(data, list):
        return {"threads": data}
    return data if isinstance(data, dict) else {}


def _get_open_threads(reboot_dir: Path, limit: int = 7) -> list[dict]:
    data = _load_threads_data(reboot_dir)
    threads = [t for t in data.get("threads", []) if t.get("status") == "open"]
    priority_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
    threads.sort(key=lambda t: (
        priority_order.get(t.get("priority", "low"), 3),
        -(datetime.fromisoformat(
            t.get("last_activity", "2000-01-01T00:00:00Z").replace("Z", "+00:00")
        ).timestamp())
    ))
    return threads[:limit]


def _integrity_warning(reboot_dir: Path) -> str:
    data = _load_threads_data(reboot_dir)
    integrity = data.get("integrity", {})
    last_ts = integrity.get("last_nats_timestamp")
    if not last_ts:
        return "⚠️ No integrity data — thread tracker may not have run yet."
    try:
        if len(last_ts) <= 10:
            last_dt = datetime.strptime(last_ts, "%Y-%m-%d").replace(tzinfo=timezone.utc)
        else:
            last_dt = datetime.fromisoformat(last_ts.replace("Z", "+00:00"))
        age_min = (datetime.now(timezone.utc) - last_dt).total_seconds() / 60
        if age_min > 480:
            return f"🚨 STALE DATA: Thread data is {age_min/60:.0f}h old."
        elif age_min > 120:
            return f"⚠️ Data staleness: Thread data is {age_min/60:.0f}h old."
        return ""
    except Exception:
        return "⚠️ Could not parse integrity timestamp."


def _load_hot_snapshot(reboot_dir: Path) -> str:
    f = reboot_dir / "hot-snapshot.md"
    try:
        if not f.exists():
            return ""
        mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
        if datetime.now(timezone.utc) - mtime > timedelta(hours=1):
            return ""
        return f.read_text().strip()[:1000]
    except Exception:
        return ""


def _load_decisions(reboot_dir: Path) -> list[dict]:
    data = load_json(reboot_dir / "decisions.json")
    if isinstance(data, list):
        data = {"decisions": data}
    if not isinstance(data, dict):
        return []
    cutoff = (datetime.now(timezone.utc) - timedelta(days=14)).strftime("%Y-%m-%d")
    recent = [d for d in data.get("decisions", []) if d.get("date", "") >= cutoff]
    return recent[-10:]


def _load_narrative(reboot_dir: Path) -> str:
    f = reboot_dir / "narrative.md"
    try:
        if not f.exists():
            return ""
        mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
        if datetime.now(timezone.utc) - mtime > timedelta(hours=36):
            return ""
        return f.read_text().strip()[:2000]
    except Exception:
        return ""


def _query_knowledge_for_thread(thread: dict, facts: list[dict]) -> list[str]:
    """Score facts by keyword overlap with thread."""
    results = []
    query_terms = thread.get("title", "") + " " + thread.get("summary", "")
    query_words = set(query_terms.lower().split())

    scored = []
    for fact in facts:
        text = fact.get("text", "").lower()
        priority = fact.get("priority", "normal")
        boost = {"critical": 4, "high": 2, "normal": 1, "low": 0.5}.get(priority, 1)
        overlap = len(query_words & set(text.split()))
        if overlap > 0:
            score = overlap * boost
            created = fact.get("created", "")
            if created and created[:10] >= (datetime.now(timezone.utc) - timedelta(days=7)).strftime("%Y-%m-%d"):
                score *= 1.5
            scored.append((score, fact))

    scored.sort(key=lambda x: -x[0])
    for score, fact in scored[:3]:
        prio = fact.get("priority", "?")
        conf = fact.get("confidence", 1.0)
        text = fact.get("text", "")[:150]
        results.append(f"  [{prio}|{conf:.0%}] {text}")

    return results[:5]


def _get_execution_mode() -> str:
    hour = datetime.now().hour
    if 6 <= hour < 12:
        return "Morning — brief, directive, efficient"
    elif 12 <= hour < 18:
        return "Afternoon — execution mode"
    elif 18 <= hour < 22:
        return "Evening — strategic, philosophical possible"
    return "Night — emergencies only"


def assemble(workspace: Path = None, max_chars: int = DEFAULT_MAX_CHARS,
             facts_file: Path = None, calendar_cmd: list[str] = None,
             wellbeing_file: Path = None) -> str:
    """Assemble BOOTSTRAP.md content.

    Args:
        workspace: Workspace directory (default: WORKSPACE_DIR or cwd)
        max_chars: Character budget
        facts_file: Path to facts.jsonl for knowledge queries
        calendar_cmd: Command to run for calendar events (optional)
        wellbeing_file: Path to wellbeing.json (optional)
    """
    ws = workspace or get_workspace_dir()
    reboot_dir = get_reboot_dir(ws)
    agent = get_agent_name()
    now = datetime.now(timezone.utc)
    local_now = datetime.now()
    parts = []

    parts.append(f"# Context Briefing")
    parts.append(f"Agent: {agent} | Generated: {now.isoformat()[:19]}Z | Local: {local_now.strftime('%H:%M')}")
    parts.append("")

    # State
    parts.append("## ⚡ State")
    parts.append(f"Mode: {_get_execution_mode()}")

    # Wellbeing (optional)
    if wellbeing_file:
        wb = load_json(wellbeing_file)
        if wb:
            parts.append(f"Wellbeing: {wb.get('status', '?')} ({wb.get('overall', 0):.0%}) trend:{wb.get('history_trend', '?')}")

    # Session mood
    td = _load_threads_data(reboot_dir)
    mood = td.get("session_mood", "neutral")
    if mood != "neutral":
        emoji = {"frustrated": "😤", "excited": "🔥", "tense": "⚡", "productive": "🔧", "exploratory": "🔬"}.get(mood, "")
        parts.append(f"Last session mood: {mood} {emoji}")

    warning = _integrity_warning(reboot_dir)
    if warning:
        parts.append(f"\n{warning}")

    # Calendar (optional external command)
    if calendar_cmd:
        try:
            result = subprocess.run(calendar_cmd, capture_output=True, text=True, timeout=10)
            if result.returncode == 0 and result.stdout.strip():
                parts.append(f"\n### 📅 Today")
                parts.append("\n".join(result.stdout.strip().split("\n")[:10]))
        except Exception:
            pass
    parts.append("")

    # Hot snapshot
    hot = _load_hot_snapshot(reboot_dir)
    if hot:
        parts.append("## 🔥 Last Session Snapshot")
        parts.append(hot)
        parts.append("")

    # Narrative
    narrative = _load_narrative(reboot_dir)
    if narrative:
        parts.append("## 📖 Narrative (last 24h)")
        parts.append(narrative)
        parts.append("")

    # Threads + knowledge
    threads = _get_open_threads(reboot_dir)
    all_facts = load_facts(facts_file) if facts_file and facts_file.exists() else []

    if threads:
        parts.append("## 🧵 Active Threads")
        for t in threads:
            prio_emoji = {"critical": "🔴", "high": "🟠", "medium": "🟡", "low": "🔵"}.get(t.get("priority"), "⚪")
            mood_tag = f" [{t.get('mood', '')}]" if t.get("mood", "neutral") != "neutral" else ""
            parts.append(f"\n### {prio_emoji} {t['title']}{mood_tag}")
            parts.append(f"Priority: {t.get('priority', '?')} | Last: {t.get('last_activity', '?')[:16]}")
            parts.append(f"Summary: {t.get('summary', 'no summary')}")
            if t.get("waiting_for"):
                parts.append(f"⏳ Waiting for: {t['waiting_for']}")
            if t.get("decisions"):
                parts.append(f"Decisions: {', '.join(t['decisions'])}")
            if all_facts:
                knowledge = _query_knowledge_for_thread(t, all_facts)
                if knowledge:
                    parts.append("Knowledge:")
                    parts.extend(knowledge)
        parts.append("")

    # Decisions
    decisions = _load_decisions(reboot_dir)
    if decisions:
        parts.append("## 🎯 Recent Decisions")
        for d in decisions:
            ie = {"critical": "🔴", "high": "🟠", "medium": "🟡"}.get(d.get("impact"), "⚪")
            parts.append(f"- {ie} **{d['what']}** ({d.get('date', '?')})")
            if d.get("why"):
                parts.append(f"  Why: {d['why'][:100]}")
        parts.append("")

    # Footer
    parts.append("---")
    parts.append(f"_Boot context | {len(threads)} active threads | {len(decisions)} recent decisions_")

    result = "\n".join(parts)
    if len(result) > max_chars:
        result = result[:max_chars] + "\n\n_[truncated to token budget]_"
    return result


def run(dry_run: bool = False, max_tokens: int = 4000, workspace: Path = None,
        facts_file: Path = None, calendar_cmd: list[str] = None,
        wellbeing_file: Path = None):
    """Run boot assembler."""
    ws = workspace or get_workspace_dir()
    output_file = ws / "BOOTSTRAP.md"

    bootstrap = assemble(
        workspace=ws,
        max_chars=max_tokens * 4,
        facts_file=facts_file,
        calendar_cmd=calendar_cmd,
        wellbeing_file=wellbeing_file,
    )

    if dry_run:
        print(bootstrap)
        print(f"\n--- Stats: {len(bootstrap)} chars, ~{len(bootstrap)//4} tokens ---")
    else:
        output_file.write_text(bootstrap)
        print(f"✅ BOOTSTRAP.md written ({len(bootstrap)} chars, ~{len(bootstrap)//4} tokens)")


def main():
    import argparse
    parser = argparse.ArgumentParser(description="Boot Assembler — Query-driven boot context")
    parser.add_argument("--dry-run", action="store_true")
    parser.add_argument("--max-tokens", type=int, default=4000)
    parser.add_argument("--workspace", type=str, help="Workspace directory")
    parser.add_argument("--facts-file", type=str, help="Path to facts.jsonl")
    args = parser.parse_args()

    ws = Path(args.workspace) if args.workspace else None
    ff = Path(args.facts_file) if args.facts_file else None
    run(dry_run=args.dry_run, max_tokens=args.max_tokens, workspace=ws, facts_file=ff)


if __name__ == "__main__":
    main()