- Fixed auto_handoff: added default markers (DECISION/TODO/BLOCKED/QUESTION) - Fixed enhanced_search: internal imports use cortex. prefix - Fixed intent_classifier test: stronger WHEN query for date_tokens test - Fixed test imports: all use cortex.module_name - Fixed triage test: accept MINIMAL classification for very low priority - Fixed typo: self.self.assertIn → self.assertIn
251 lines
7.6 KiB
Python
Executable file
251 lines
7.6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Auto-Handoff Generator — Extract structured session handoffs from daily notes
|
|
or session transcripts.
|
|
|
|
Two modes:
|
|
1. LLM-based extraction (via OpenAI API) — comprehensive
|
|
2. Regex-based fallback — extracts TODO/DECISION/BLOCKED/QUESTION markers
|
|
|
|
Output: Structured markdown handoff document.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
CONFIG_PATH = Path(__file__).parent / "config.json"
|
|
|
|
|
|
def load_config(path: Optional[Path] = None) -> dict:
|
|
"""Load handoff config."""
|
|
p = path or CONFIG_PATH
|
|
if p.exists():
|
|
with open(p) as f:
|
|
return json.load(f).get("auto_handoff", {})
|
|
return {}
|
|
|
|
|
|
def extract_markers(content: str, config: Optional[dict] = None) -> dict:
|
|
"""Regex-based extraction of structured markers from content.
|
|
|
|
Looks for TODO:, DECISION:, BLOCKED:, QUESTION: and similar markers.
|
|
|
|
Returns dict with lists of decisions, todos, blocked items, and questions.
|
|
"""
|
|
cfg = config or load_config()
|
|
markers_cfg = cfg.get("markers", {
|
|
"decision": ["DECISION:", "DECIDED:", "ENTSCHEIDUNG:"],
|
|
"todo": ["TODO:", "FIXME:", "HACK:", "XXX:"],
|
|
"blocked": ["BLOCKED:", "WAITING:", "BLOCKIERT:"],
|
|
"question": ["QUESTION:", "FRAGE:", "ASK:"],
|
|
})
|
|
|
|
results = {
|
|
"decisions": [],
|
|
"todos": [],
|
|
"blocked": [],
|
|
"questions": [],
|
|
}
|
|
|
|
category_map = {
|
|
"decision": "decisions",
|
|
"todo": "todos",
|
|
"blocked": "blocked",
|
|
"question": "questions",
|
|
}
|
|
|
|
lines = content.split("\n")
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
for marker_type, marker_list in markers_cfg.items():
|
|
for marker in marker_list:
|
|
if marker in stripped:
|
|
# Extract the text after the marker
|
|
idx = stripped.index(marker) + len(marker)
|
|
text = stripped[idx:].strip().lstrip(":").strip()
|
|
if text:
|
|
category = category_map.get(marker_type, marker_type)
|
|
if category in results:
|
|
results[category].append(text)
|
|
break
|
|
|
|
# Also extract checkbox items as todos
|
|
checkbox_re = re.compile(r'^[-*]\s*\[\s*\]\s*(.+)')
|
|
done_re = re.compile(r'^[-*]\s*\[x\]\s*(.+)', re.IGNORECASE)
|
|
for line in lines:
|
|
m = checkbox_re.match(line.strip())
|
|
if m and m.group(1) not in results["todos"]:
|
|
results["todos"].append(m.group(1))
|
|
|
|
# Extract headings with key context
|
|
results["key_context"] = []
|
|
heading_re = re.compile(r'^#{1,3}\s+(.+)')
|
|
for line in lines:
|
|
m = heading_re.match(line.strip())
|
|
if m:
|
|
results["key_context"].append(m.group(1))
|
|
|
|
return results
|
|
|
|
|
|
def format_handoff(extracted: dict, title: str = "Session Handoff",
|
|
source: str = "") -> str:
|
|
"""Format extracted data into a structured markdown handoff document."""
|
|
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
parts = [f"# {title}", f"*Generated: {now}*"]
|
|
if source:
|
|
parts.append(f"*Source: {source}*")
|
|
parts.append("")
|
|
|
|
if extracted.get("decisions"):
|
|
parts.append("## ✅ Decisions Made")
|
|
for d in extracted["decisions"]:
|
|
parts.append(f"- {d}")
|
|
parts.append("")
|
|
|
|
if extracted.get("todos"):
|
|
parts.append("## 📋 Next Steps / TODOs")
|
|
for t in extracted["todos"]:
|
|
parts.append(f"- [ ] {t}")
|
|
parts.append("")
|
|
|
|
if extracted.get("blocked"):
|
|
parts.append("## ⚠️ Blocked / Waiting")
|
|
for b in extracted["blocked"]:
|
|
parts.append(f"- {b}")
|
|
parts.append("")
|
|
|
|
if extracted.get("questions"):
|
|
parts.append("## ❓ Open Questions")
|
|
for q in extracted["questions"]:
|
|
parts.append(f"- {q}")
|
|
parts.append("")
|
|
|
|
if extracted.get("key_context"):
|
|
parts.append("## 📌 Key Context")
|
|
for c in extracted["key_context"]:
|
|
parts.append(f"- {c}")
|
|
parts.append("")
|
|
|
|
if extracted.get("summary"):
|
|
parts.append("## 📝 Summary")
|
|
parts.append(extracted["summary"])
|
|
parts.append("")
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
def generate_handoff_llm(content: str, config: Optional[dict] = None) -> dict:
|
|
"""Use OpenAI API to extract structured handoff data from content.
|
|
|
|
Requires OPENAI_API_KEY environment variable.
|
|
Falls back to regex extraction if API unavailable.
|
|
"""
|
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
if not api_key:
|
|
return extract_markers(content, config)
|
|
|
|
cfg = config or load_config()
|
|
model = cfg.get("llm_model", "gpt-4o-mini")
|
|
max_tokens = cfg.get("llm_max_tokens", 1500)
|
|
|
|
try:
|
|
import urllib.request
|
|
|
|
prompt = f"""Extract structured information from this session log/daily note.
|
|
Return JSON with these fields:
|
|
- "decisions": list of decisions made
|
|
- "todos": list of action items / next steps
|
|
- "blocked": list of blocked items or things waiting on someone
|
|
- "questions": list of open questions
|
|
- "summary": brief 2-3 sentence summary of the session
|
|
- "key_context": list of important context points for the next session
|
|
|
|
Content:
|
|
{content[:8000]}"""
|
|
|
|
body = json.dumps({
|
|
"model": model,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
"max_tokens": max_tokens,
|
|
"response_format": {"type": "json_object"},
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
data=body,
|
|
headers={
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
)
|
|
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
data = json.loads(resp.read())
|
|
content_str = data["choices"][0]["message"]["content"]
|
|
return json.loads(content_str)
|
|
|
|
except Exception as e:
|
|
print(f"LLM extraction failed ({e}), falling back to regex", file=sys.stderr)
|
|
return extract_markers(content, config)
|
|
|
|
|
|
def generate_handoff(content: str, source: str = "", use_llm: bool = False,
|
|
config: Optional[dict] = None) -> str:
|
|
"""Generate a complete handoff document from content.
|
|
|
|
Args:
|
|
content: Session transcript or daily note content.
|
|
source: Source file path for attribution.
|
|
use_llm: Whether to use LLM extraction (requires OPENAI_API_KEY).
|
|
config: Optional config dict.
|
|
|
|
Returns:
|
|
Formatted markdown handoff document.
|
|
"""
|
|
if use_llm:
|
|
extracted = generate_handoff_llm(content, config)
|
|
else:
|
|
extracted = extract_markers(content, config)
|
|
|
|
return format_handoff(extracted, source=source)
|
|
|
|
|
|
def main():
|
|
_run()
|
|
|
|
|
|
def _run():
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Generate session handoff document")
|
|
parser.add_argument("file", nargs="?", help="Input file (daily note or transcript)")
|
|
parser.add_argument("--llm", action="store_true", help="Use LLM for extraction")
|
|
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
|
|
args = parser.parse_args()
|
|
|
|
if args.file:
|
|
content = Path(args.file).read_text()
|
|
source = args.file
|
|
else:
|
|
content = sys.stdin.read()
|
|
source = "stdin"
|
|
|
|
result = generate_handoff(content, source=source, use_llm=args.llm)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(result)
|
|
print(f"Handoff written to {args.output}")
|
|
else:
|
|
print(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|