Modules: triage, health_scanner, feedback_loop, memory_hygiene,
roadmap, validate_output, enhanced_search, auto_handoff
+ composite_scorer, intent_classifier
CLI: 'cortex <module> <command>' unified entry point
Tests: 157/169 passing (12 assertion mismatches from rename)
Docker: python:3.11-slim based
363 lines
13 KiB
Python
363 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""Sub-Agent Output Validator — post-completion validation of sub-agent work.
|
|
|
|
Validates that spawned tasks actually produced what was requested:
|
|
files created, non-empty, syntax-valid, tests passed, errors addressed.
|
|
|
|
Usage:
|
|
python3 validate_output.py --transcript /path/to/session.jsonl --task "description"
|
|
python3 validate_output.py --session-key "agent:main:subagent:xxx"
|
|
"""
|
|
|
|
import argparse
|
|
import ast
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
DEFAULT_SESSIONS_DIR = Path(os.path.expanduser("~/.openclaw/agents/main/sessions"))
|
|
|
|
|
|
def parse_jsonl(path: Path) -> list[dict]:
|
|
entries = []
|
|
with open(path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
entries.append(json.loads(line))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return entries
|
|
|
|
|
|
def find_session_by_key(session_key: str) -> Path | None:
|
|
"""Find a session JSONL by its session key (searches session headers)."""
|
|
sessions_dir = DEFAULT_SESSIONS_DIR
|
|
if not sessions_dir.exists():
|
|
return None
|
|
# session key format: agent:main:subagent:UUID — extract UUID
|
|
parts = session_key.split(":")
|
|
uuid_part = parts[-1] if parts else session_key
|
|
candidate = sessions_dir / f"{uuid_part}.jsonl"
|
|
if candidate.exists():
|
|
return candidate
|
|
# Search all files
|
|
for f in sessions_dir.glob("*.jsonl"):
|
|
try:
|
|
with open(f) as fh:
|
|
first_line = fh.readline().strip()
|
|
if first_line:
|
|
header = json.loads(first_line)
|
|
if header.get("id") == uuid_part or session_key in json.dumps(header):
|
|
return f
|
|
except (json.JSONDecodeError, OSError):
|
|
pass
|
|
return None
|
|
|
|
|
|
def extract_mentioned_files(task: str) -> list[str]:
|
|
"""Extract file paths/names mentioned in a task description."""
|
|
patterns = [
|
|
r'`([^`]+\.\w{1,5})`', # `filename.ext`
|
|
r'(\S+\.(?:py|ts|js|json|md|yaml|yml|toml|sh))', # bare filenames
|
|
]
|
|
files = set()
|
|
for pat in patterns:
|
|
for m in re.finditer(pat, task):
|
|
f = m.group(1)
|
|
if "/" not in f and not f.startswith("."):
|
|
files.add(f)
|
|
elif f.startswith("~/") or f.startswith("./") or f.startswith("/"):
|
|
files.add(f)
|
|
return list(files)
|
|
|
|
|
|
def extract_created_files_from_transcript(entries: list[dict]) -> list[str]:
|
|
"""Extract files that were written/created during the session."""
|
|
files = set()
|
|
for entry in entries:
|
|
msg = entry.get("message", {})
|
|
content = msg.get("content", [])
|
|
if isinstance(content, list):
|
|
for item in content:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
# Tool calls to write/edit
|
|
if item.get("type") == "toolCall":
|
|
name = item.get("name", "")
|
|
args = item.get("arguments", {})
|
|
if isinstance(args, str):
|
|
try:
|
|
args = json.loads(args)
|
|
except json.JSONDecodeError:
|
|
args = {}
|
|
if name in ("write", "Write"):
|
|
fp = args.get("file_path") or args.get("path", "")
|
|
if fp:
|
|
files.add(fp)
|
|
elif name in ("edit", "Edit"):
|
|
fp = args.get("file_path") or args.get("path", "")
|
|
if fp:
|
|
files.add(fp)
|
|
# Also check exec commands for file creation
|
|
if isinstance(content, list):
|
|
for item in content:
|
|
if isinstance(item, dict) and item.get("type") == "toolCall":
|
|
if item.get("name") in ("exec", "Exec"):
|
|
args = item.get("arguments", {})
|
|
if isinstance(args, str):
|
|
try:
|
|
args = json.loads(args)
|
|
except json.JSONDecodeError:
|
|
args = {}
|
|
cmd = args.get("command", "")
|
|
# Detect mkdir, touch, tee, etc.
|
|
for m in re.finditer(r'(?:tee|>)\s+(\S+)', cmd):
|
|
files.add(m.group(1))
|
|
return list(files)
|
|
|
|
|
|
def extract_transcript_errors(entries: list[dict]) -> list[str]:
|
|
"""Extract unaddressed error messages from transcript."""
|
|
errors = []
|
|
for entry in entries:
|
|
msg = entry.get("message", {})
|
|
if msg.get("isError"):
|
|
content = msg.get("content", [])
|
|
if isinstance(content, list):
|
|
for item in content:
|
|
if isinstance(item, dict):
|
|
errors.append(item.get("text", "unknown error")[:200])
|
|
elif isinstance(content, str):
|
|
errors.append(content[:200])
|
|
return errors
|
|
|
|
|
|
def check_test_results(entries: list[dict]) -> dict:
|
|
"""Scan transcript for test execution results."""
|
|
result = {"tests_found": False, "tests_passed": None, "details": ""}
|
|
for entry in entries:
|
|
msg = entry.get("message", {})
|
|
content = msg.get("content", [])
|
|
if isinstance(content, list):
|
|
for item in content:
|
|
text = ""
|
|
if isinstance(item, dict):
|
|
text = item.get("text", "")
|
|
if not text:
|
|
continue
|
|
# pytest output
|
|
if re.search(r'\d+ passed', text):
|
|
result["tests_found"] = True
|
|
if "failed" in text.lower():
|
|
result["tests_passed"] = False
|
|
result["details"] = text[:200]
|
|
else:
|
|
result["tests_passed"] = True
|
|
result["details"] = text[:200]
|
|
# unittest output
|
|
if re.search(r'OK\s*$', text.strip()):
|
|
result["tests_found"] = True
|
|
result["tests_passed"] = True
|
|
if re.search(r'FAILED', text):
|
|
result["tests_found"] = True
|
|
result["tests_passed"] = False
|
|
result["details"] = text[:200]
|
|
return result
|
|
|
|
|
|
class ValidationReport:
|
|
def __init__(self):
|
|
self.checks: list[dict] = []
|
|
self.passed = 0
|
|
self.failed = 0
|
|
self.warnings = 0
|
|
|
|
def add(self, name: str, status: str, detail: str = ""):
|
|
"""status: pass, fail, warn"""
|
|
self.checks.append({"name": name, "status": status, "detail": detail})
|
|
if status == "pass":
|
|
self.passed += 1
|
|
elif status == "fail":
|
|
self.failed += 1
|
|
else:
|
|
self.warnings += 1
|
|
|
|
@property
|
|
def ok(self) -> bool:
|
|
return self.failed == 0
|
|
|
|
def to_json(self) -> dict:
|
|
return {
|
|
"passed": self.passed,
|
|
"failed": self.failed,
|
|
"warnings": self.warnings,
|
|
"ok": self.ok,
|
|
"checks": self.checks,
|
|
}
|
|
|
|
def to_human(self) -> str:
|
|
lines = [f"Validation Report: {'✅ PASS' if self.ok else '❌ FAIL'}",
|
|
f" {self.passed} passed, {self.failed} failed, {self.warnings} warnings", ""]
|
|
for c in self.checks:
|
|
icon = {"pass": "✅", "fail": "❌", "warn": "⚠️"}.get(c["status"], "?")
|
|
line = f" {icon} {c['name']}"
|
|
if c["detail"]:
|
|
line += f" — {c['detail']}"
|
|
lines.append(line)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def validate_file_exists(path: str) -> tuple[bool, str]:
|
|
"""Check if file exists and is non-empty."""
|
|
p = Path(os.path.expanduser(path))
|
|
if not p.exists():
|
|
return False, f"File not found: {path}"
|
|
if p.stat().st_size == 0:
|
|
return False, f"File is empty: {path}"
|
|
return True, f"Exists, {p.stat().st_size} bytes"
|
|
|
|
|
|
def validate_python_syntax(path: str) -> tuple[bool, str]:
|
|
"""Check Python file for syntax errors."""
|
|
p = Path(os.path.expanduser(path))
|
|
if not p.exists():
|
|
return False, "File not found"
|
|
try:
|
|
source = p.read_text()
|
|
compile(source, str(p), "exec")
|
|
return True, "Syntax OK"
|
|
except SyntaxError as e:
|
|
return False, f"Syntax error: {e}"
|
|
|
|
|
|
def validate_typescript_structure(path: str) -> tuple[bool, str]:
|
|
"""Basic TypeScript validation — check for exports, no excessive 'any'."""
|
|
p = Path(os.path.expanduser(path))
|
|
if not p.exists():
|
|
return False, "File not found"
|
|
content = p.read_text()
|
|
issues = []
|
|
any_count = len(re.findall(r'\bany\b', content))
|
|
if any_count > 10:
|
|
issues.append(f"Excessive 'any' usage ({any_count} occurrences)")
|
|
if not re.search(r'\bexport\b', content):
|
|
issues.append("No exports found")
|
|
if issues:
|
|
return False, "; ".join(issues)
|
|
return True, "Structure OK"
|
|
|
|
|
|
def validate_staging_location(files: list[str], staging_dir: str = "staging") -> list[tuple[str, bool, str]]:
|
|
"""Check if files are in the staging directory."""
|
|
results = []
|
|
for f in files:
|
|
expanded = os.path.expanduser(f)
|
|
in_staging = staging_dir in expanded
|
|
results.append((f, in_staging, "In staging" if in_staging else "Not in staging dir"))
|
|
return results
|
|
|
|
|
|
def run_validation(transcript_path: Path, task: str) -> ValidationReport:
|
|
"""Main validation logic."""
|
|
report = ValidationReport()
|
|
entries = parse_jsonl(transcript_path)
|
|
|
|
if not entries:
|
|
report.add("Transcript readable", "fail", "No entries found")
|
|
return report
|
|
report.add("Transcript readable", "pass", f"{len(entries)} entries")
|
|
|
|
# Check mentioned files exist
|
|
mentioned = extract_mentioned_files(task)
|
|
created = extract_created_files_from_transcript(entries)
|
|
all_files = list(set(mentioned + created))
|
|
|
|
for f in all_files:
|
|
exists, detail = validate_file_exists(f)
|
|
report.add(f"File exists: {Path(f).name}", "pass" if exists else "fail", detail)
|
|
|
|
# Syntax check Python files
|
|
for f in all_files:
|
|
if f.endswith(".py"):
|
|
ok, detail = validate_python_syntax(f)
|
|
report.add(f"Python syntax: {Path(f).name}", "pass" if ok else "fail", detail)
|
|
|
|
# Syntax check TypeScript files
|
|
for f in all_files:
|
|
if f.endswith(".ts") or f.endswith(".tsx"):
|
|
ok, detail = validate_typescript_structure(f)
|
|
report.add(f"TS structure: {Path(f).name}", "pass" if ok else "fail", detail)
|
|
|
|
# Check staging location if mentioned in task
|
|
if "staging" in task.lower():
|
|
for f in all_files:
|
|
expanded = os.path.expanduser(f)
|
|
in_staging = "staging" in expanded
|
|
report.add(f"In staging: {Path(f).name}",
|
|
"pass" if in_staging else "warn",
|
|
"In staging dir" if in_staging else "Not in staging dir")
|
|
|
|
# Check test results if tests were requested
|
|
if any(w in task.lower() for w in ["test", "tests", "pytest", "unittest"]):
|
|
test_results = check_test_results(entries)
|
|
if test_results["tests_found"]:
|
|
if test_results["tests_passed"]:
|
|
report.add("Tests passed", "pass", test_results["details"][:100])
|
|
else:
|
|
report.add("Tests passed", "fail", test_results["details"][:100])
|
|
else:
|
|
report.add("Tests executed", "warn", "No test output found in transcript")
|
|
|
|
# Check for unaddressed errors
|
|
errors = extract_transcript_errors(entries)
|
|
if errors:
|
|
report.add("Unaddressed errors", "warn", f"{len(errors)} error(s) in transcript")
|
|
else:
|
|
report.add("No unaddressed errors", "pass")
|
|
|
|
return report
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Sub-Agent Output Validator")
|
|
parser.add_argument("--transcript", help="Path to session JSONL file")
|
|
parser.add_argument("--session-key", help="Session key (e.g. agent:main:subagent:xxx)")
|
|
parser.add_argument("--task", default="", help="Original task description")
|
|
parser.add_argument("--json", action="store_true", help="Output JSON only")
|
|
args = parser.parse_args()
|
|
|
|
if not args.transcript and not args.session_key:
|
|
parser.error("Either --transcript or --session-key is required")
|
|
|
|
if args.session_key:
|
|
path = find_session_by_key(args.session_key)
|
|
if not path:
|
|
print(f"Session not found: {args.session_key}", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
path = Path(args.transcript)
|
|
if not path.exists():
|
|
print(f"File not found: {args.transcript}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
report = run_validation(path, args.task)
|
|
|
|
if args.json:
|
|
print(json.dumps(report.to_json(), indent=2))
|
|
else:
|
|
print(report.to_human())
|
|
print()
|
|
print(json.dumps(report.to_json(), indent=2))
|
|
|
|
sys.exit(0 if report.ok else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|