fix: format specifier crash when stream_info is None
All checks were successful
Tests / test (push) Successful in 3s
All checks were successful
Tests / test (push) Successful in 3s
This commit is contained in:
parent
cbd3556a09
commit
0123ec7090
7 changed files with 2865 additions and 0 deletions
|
|
@ -12,6 +12,10 @@ Usage:
|
|||
cortex validate --transcript <path> --task "description"
|
||||
cortex search "query" [--memory-dir ~/.cortex/memory]
|
||||
cortex handoff --from <session> --to <agent> --task "description"
|
||||
cortex learn --since 24h [--nats url] [--jsonl file]
|
||||
cortex context [--events 2000] [--output file]
|
||||
cortex track scan|list|done|check
|
||||
cortex sentinel scan|matches|report|stats
|
||||
cortex version
|
||||
"""
|
||||
|
||||
|
|
@ -71,6 +75,22 @@ def main():
|
|||
from cortex.auto_handoff import main as handoff_main
|
||||
handoff_main()
|
||||
|
||||
elif cmd == "learn":
|
||||
from cortex.learn import main as learn_main
|
||||
learn_main()
|
||||
|
||||
elif cmd == "context":
|
||||
from cortex.context import main as context_main
|
||||
context_main()
|
||||
|
||||
elif cmd == "track":
|
||||
from cortex.tracker import main as tracker_main
|
||||
tracker_main()
|
||||
|
||||
elif cmd == "sentinel":
|
||||
from cortex.sentinel import main as sentinel_main
|
||||
sentinel_main()
|
||||
|
||||
elif cmd in ("-h", "--help", "help"):
|
||||
print(__doc__.strip())
|
||||
|
||||
|
|
|
|||
433
cortex/context.py
Normal file
433
cortex/context.py
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Cortex Context — Generate LEARNING_CONTEXT.md from NATS events.
|
||||
|
||||
Ported from ~/clawd/scripts/context-generator.mjs
|
||||
|
||||
Usage:
|
||||
cortex context [--events 2000] [--output context.md]
|
||||
cortex context --jsonl events.jsonl
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from cortex.config import cortex_home
|
||||
|
||||
|
||||
# --- Configuration ---
|
||||
|
||||
def learning_dir() -> Path:
|
||||
"""Directory for learning output files."""
|
||||
return cortex_home() / "learning"
|
||||
|
||||
|
||||
# Topic keywords for classification
|
||||
TOPIC_KEYWORDS = {
|
||||
'coding': ['code', 'script', 'function', 'bug', 'error', 'python', 'javascript', 'node', 'npm'],
|
||||
'mondo-gate': ['mondo', 'mygate', 'fintech', 'payment', 'crypto', 'bitcoin', 'token', 'wallet'],
|
||||
'infrastructure': ['server', 'docker', 'systemd', 'service', 'nats', 'api', 'deploy', 'port'],
|
||||
'agents': ['agent', 'mona', 'vera', 'stella', 'viola', 'sub-agent', 'spawn', 'session'],
|
||||
'security': ['security', 'audit', 'vulnerability', 'password', 'credential', 'auth', 'vault'],
|
||||
'memory': ['memory', 'context', 'event', 'learning', 'brain', 'recall', 'remember'],
|
||||
'communication': ['matrix', 'telegram', 'email', 'message', 'chat', 'voice'],
|
||||
'business': ['meeting', 'pitch', 'investor', 'series', 'valuation', 'partner', 'strategy'],
|
||||
'files': ['file', 'read', 'write', 'edit', 'create', 'delete', 'directory'],
|
||||
}
|
||||
|
||||
|
||||
# --- NATS CLI Helpers ---
|
||||
|
||||
def get_nats_bin() -> str:
|
||||
"""Find nats binary."""
|
||||
# Try common locations
|
||||
for path in [
|
||||
os.path.expanduser('~/bin/nats'),
|
||||
'/usr/local/bin/nats',
|
||||
'/usr/bin/nats',
|
||||
]:
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
# Fall back to PATH
|
||||
return 'nats'
|
||||
|
||||
|
||||
def get_stream_info() -> Optional[dict]:
|
||||
"""Get NATS stream info."""
|
||||
nats_bin = get_nats_bin()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[nats_bin, 'stream', 'info', 'openclaw-events', '--json'],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode == 0:
|
||||
info = json.loads(result.stdout)
|
||||
return {
|
||||
'messages': info.get('state', {}).get('messages', 0),
|
||||
'first_seq': info.get('state', {}).get('first_seq', 1),
|
||||
'last_seq': info.get('state', {}).get('last_seq', 0),
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"❌ Error getting stream info: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def fetch_nats_events(start_seq: int, end_seq: int) -> list[dict]:
|
||||
"""Fetch events from NATS by sequence range."""
|
||||
nats_bin = get_nats_bin()
|
||||
events = []
|
||||
|
||||
for seq in range(start_seq, end_seq + 1):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[nats_bin, 'stream', 'get', 'openclaw-events', str(seq), '--json'],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0:
|
||||
msg = json.loads(result.stdout)
|
||||
# Decode base64 data
|
||||
import base64
|
||||
data_b64 = msg.get('data', '')
|
||||
data = json.loads(base64.b64decode(data_b64).decode('utf-8'))
|
||||
events.append({
|
||||
'seq': msg.get('seq'),
|
||||
'subject': msg.get('subject'),
|
||||
'time': msg.get('time'),
|
||||
**data
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Progress indicator
|
||||
if len(events) % 200 == 0 and len(events) > 0:
|
||||
print(f"\r Fetched {len(events)} events...", end='', file=sys.stderr)
|
||||
|
||||
print(file=sys.stderr)
|
||||
return events
|
||||
|
||||
|
||||
def load_jsonl_events(source: str) -> list[dict]:
|
||||
"""Load events from JSONL file or stdin."""
|
||||
events = []
|
||||
|
||||
if source == '-':
|
||||
lines = sys.stdin
|
||||
else:
|
||||
path = Path(source)
|
||||
if not path.exists():
|
||||
print(f"❌ File not found: {source}", file=sys.stderr)
|
||||
return []
|
||||
lines = path.open()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if source != '-':
|
||||
lines.close()
|
||||
|
||||
return events
|
||||
|
||||
|
||||
# --- Analysis ---
|
||||
|
||||
def extract_text(event: dict) -> str:
|
||||
"""Extract text content from various event formats."""
|
||||
# Try text_preview
|
||||
if event.get('payload', {}).get('text_preview'):
|
||||
previews = event['payload']['text_preview']
|
||||
if isinstance(previews, list):
|
||||
return ' '.join(p.get('text', '') for p in previews if isinstance(p, dict) and p.get('type') == 'text')
|
||||
|
||||
# Try payload.data.text
|
||||
if event.get('payload', {}).get('data', {}).get('text'):
|
||||
return event['payload']['data']['text']
|
||||
|
||||
# Try payload.message
|
||||
if event.get('payload', {}).get('message'):
|
||||
return event['payload']['message']
|
||||
|
||||
# Try data.text
|
||||
if event.get('data', {}).get('text'):
|
||||
return event['data']['text']
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
def analyze_events(events: list[dict]) -> dict:
|
||||
"""Extract patterns from events."""
|
||||
patterns = {
|
||||
'languages': {'de': 0, 'en': 0},
|
||||
'topics': {},
|
||||
'tools': {},
|
||||
'time_of_day': {'morning': 0, 'afternoon': 0, 'evening': 0, 'night': 0},
|
||||
'message_types': {},
|
||||
'agents': {},
|
||||
'sessions': set(),
|
||||
'user_messages': [],
|
||||
'assistant_messages': [],
|
||||
}
|
||||
|
||||
for event in events:
|
||||
text = extract_text(event)
|
||||
text_lower = text.lower()
|
||||
|
||||
# Message type tracking
|
||||
msg_type = event.get('type', 'unknown')
|
||||
patterns['message_types'][msg_type] = patterns['message_types'].get(msg_type, 0) + 1
|
||||
|
||||
# Agent tracking
|
||||
if event.get('agent'):
|
||||
agent = event['agent']
|
||||
patterns['agents'][agent] = patterns['agents'].get(agent, 0) + 1
|
||||
|
||||
# Session tracking
|
||||
if event.get('session'):
|
||||
patterns['sessions'].add(event['session'])
|
||||
|
||||
# Track user vs assistant messages
|
||||
if 'message_in' in msg_type or 'message.in' in msg_type:
|
||||
if len(text) > 5:
|
||||
patterns['user_messages'].append(text[:200])
|
||||
if 'message_out' in msg_type or 'message.out' in msg_type:
|
||||
if len(text) > 20:
|
||||
snippet = text[:200]
|
||||
if snippet not in patterns['assistant_messages']:
|
||||
patterns['assistant_messages'].append(snippet)
|
||||
|
||||
# Tool usage from tool events
|
||||
if 'tool' in msg_type and event.get('payload', {}).get('data', {}).get('name'):
|
||||
tool_name = event['payload']['data']['name']
|
||||
patterns['tools'][tool_name] = patterns['tools'].get(tool_name, 0) + 1
|
||||
|
||||
# Skip if no text for language/topic analysis
|
||||
if len(text) < 5:
|
||||
continue
|
||||
|
||||
# Language detection (simple heuristic)
|
||||
german_pattern = r'[äöüß]|(?:^|\s)(ich|du|wir|das|ist|und|oder|nicht|ein|eine|für|auf|mit|von)(?:\s|$)'
|
||||
english_pattern = r'(?:^|\s)(the|is|are|this|that|you|we|have|has|will|would|can|could)(?:\s|$)'
|
||||
|
||||
import re
|
||||
if re.search(german_pattern, text, re.IGNORECASE):
|
||||
patterns['languages']['de'] += 1
|
||||
elif re.search(english_pattern, text, re.IGNORECASE):
|
||||
patterns['languages']['en'] += 1
|
||||
|
||||
# Topic detection
|
||||
for topic, keywords in TOPIC_KEYWORDS.items():
|
||||
for kw in keywords:
|
||||
if kw in text_lower:
|
||||
patterns['topics'][topic] = patterns['topics'].get(topic, 0) + 1
|
||||
break
|
||||
|
||||
# Time of day (Europe/Berlin)
|
||||
ts = event.get('timestamp') or event.get('timestampMs')
|
||||
if ts:
|
||||
try:
|
||||
dt = datetime.fromtimestamp(ts / 1000 if ts > 1e12 else ts)
|
||||
hour = dt.hour
|
||||
if 6 <= hour < 12:
|
||||
patterns['time_of_day']['morning'] += 1
|
||||
elif 12 <= hour < 18:
|
||||
patterns['time_of_day']['afternoon'] += 1
|
||||
elif 18 <= hour < 23:
|
||||
patterns['time_of_day']['evening'] += 1
|
||||
else:
|
||||
patterns['time_of_day']['night'] += 1
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
def generate_context_md(patterns: dict, event_count: int, stream_info: Optional[dict]) -> str:
|
||||
"""Generate learning context markdown."""
|
||||
total_lang = patterns['languages']['de'] + patterns['languages']['en']
|
||||
primary_lang = 'Deutsch' if patterns['languages']['de'] > patterns['languages']['en'] else 'English'
|
||||
|
||||
if total_lang > 0:
|
||||
de_pct = round(patterns['languages']['de'] / total_lang * 100)
|
||||
en_pct = round(patterns['languages']['en'] / total_lang * 100)
|
||||
lang_ratio = f"{de_pct}% DE / {en_pct}% EN"
|
||||
else:
|
||||
lang_ratio = 'mixed'
|
||||
|
||||
# Sort topics by frequency
|
||||
top_topics = sorted(patterns['topics'].items(), key=lambda x: -x[1])[:8]
|
||||
|
||||
# Sort tools by usage
|
||||
top_tools = sorted(patterns['tools'].items(), key=lambda x: -x[1])[:8]
|
||||
|
||||
# Determine activity pattern
|
||||
max_time = max(patterns['time_of_day'].items(), key=lambda x: x[1]) if patterns['time_of_day'] else ('varied', 0)
|
||||
|
||||
# Recent user messages (last 5 unique)
|
||||
recent_user = list(set(patterns['user_messages'][-20:]))[-5:]
|
||||
recent_user = [m.replace('\n', ' ')[:80] for m in recent_user]
|
||||
|
||||
stream_total = stream_info['messages'] if stream_info else None
|
||||
stream_str = f"{stream_total:,}" if isinstance(stream_total, int) else '?'
|
||||
|
||||
md = f"""# Learning Context for Claudia (main agent)
|
||||
|
||||
*Auto-generated from {event_count} events*
|
||||
*Stream total: {stream_str} events*
|
||||
*Updated: {datetime.now().isoformat()}*
|
||||
|
||||
## User Preferences
|
||||
|
||||
| Preference | Value |
|
||||
|------------|-------|
|
||||
| **Primary Language** | {primary_lang} ({lang_ratio}) |
|
||||
| **Peak Activity** | {max_time[0].capitalize()} |
|
||||
| **Active Sessions** | {len(patterns['sessions'])} unique |
|
||||
|
||||
## Current Focus Areas (by frequency)
|
||||
|
||||
{chr(10).join(f"- **{t[0]}** — {t[1]} mentions" for t in top_topics) if top_topics else '- (analyzing patterns...)'}
|
||||
|
||||
## Most Used Tools
|
||||
|
||||
{chr(10).join(f"- `{t[0]}` — {t[1]}x" for t in top_tools) if top_tools else '- (no tool usage tracked)'}
|
||||
|
||||
## Activity Distribution
|
||||
|
||||
| Time | Events |
|
||||
|------|--------|
|
||||
| Morning (6-12) | {patterns['time_of_day']['morning']} |
|
||||
| Afternoon (12-18) | {patterns['time_of_day']['afternoon']} |
|
||||
| Evening (18-23) | {patterns['time_of_day']['evening']} |
|
||||
| Night (23-6) | {patterns['time_of_day']['night']} |
|
||||
|
||||
## Agent Distribution
|
||||
|
||||
{chr(10).join(f"- **{a}**: {c} events" for a, c in sorted(patterns['agents'].items(), key=lambda x: -x[1])) if patterns['agents'] else '- main only'}
|
||||
|
||||
## Recent User Requests
|
||||
|
||||
{chr(10).join(f'> "{m}..."' for m in recent_user) if recent_user else '> (no recent messages captured)'}
|
||||
|
||||
---
|
||||
|
||||
## Insights for Response Generation
|
||||
|
||||
Based on the patterns above:
|
||||
- **Language**: Respond primarily in {primary_lang}, mixing when appropriate
|
||||
- **Focus**: Current work centers on {', '.join(t[0] for t in top_topics[:3]) or 'general tasks'}
|
||||
- **Tools**: Frequently use {', '.join(t[0] for t in top_tools[:3]) or 'various tools'}
|
||||
- **Timing**: Most active during {max_time[0]}
|
||||
|
||||
*This context helps maintain continuity and personalization across sessions.*
|
||||
"""
|
||||
return md
|
||||
|
||||
|
||||
# --- Main ---
|
||||
|
||||
def run_context_generation(events: list[dict], output_path: Optional[Path] = None,
|
||||
agent: str = 'main', stream_info: Optional[dict] = None) -> dict:
|
||||
"""Run context generation on events."""
|
||||
print(f"📊 Context Generator", file=sys.stderr)
|
||||
print(f" Analyzing {len(events)} events...", file=sys.stderr)
|
||||
|
||||
patterns = analyze_events(events)
|
||||
|
||||
print(f" Topics: {len(patterns['topics'])}", file=sys.stderr)
|
||||
print(f" Tools: {len(patterns['tools'])}", file=sys.stderr)
|
||||
print(f" User messages: {len(patterns['user_messages'])}", file=sys.stderr)
|
||||
|
||||
context_md = generate_context_md(patterns, len(events), stream_info)
|
||||
|
||||
# Determine output path
|
||||
if output_path:
|
||||
out = output_path
|
||||
else:
|
||||
ldir = learning_dir() / agent
|
||||
ldir.mkdir(parents=True, exist_ok=True)
|
||||
out = ldir / 'learning-context.md'
|
||||
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
out.write_text(context_md)
|
||||
|
||||
print(f"✅ Written to {out}", file=sys.stderr)
|
||||
|
||||
# Save patterns for debugging
|
||||
patterns_out = out.parent / 'patterns.json'
|
||||
patterns_serializable = {
|
||||
**patterns,
|
||||
'sessions': list(patterns['sessions']),
|
||||
'user_messages': patterns['user_messages'][-20:],
|
||||
'assistant_messages': patterns['assistant_messages'][-10:],
|
||||
}
|
||||
patterns_out.write_text(json.dumps(patterns_serializable, indent=2, ensure_ascii=False))
|
||||
print(f" Patterns saved to {patterns_out}", file=sys.stderr)
|
||||
|
||||
return patterns_serializable
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate LEARNING_CONTEXT.md from NATS events',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Examples:
|
||||
cortex context --events 2000
|
||||
cortex context --output ~/custom-context.md
|
||||
cortex context --jsonl events.jsonl
|
||||
'''
|
||||
)
|
||||
parser.add_argument('--events', type=int, default=1000,
|
||||
help='Number of recent events to analyze (default: 1000)')
|
||||
parser.add_argument('--output', '-o', type=Path,
|
||||
help='Output file path (default: CORTEX_HOME/learning/<agent>/learning-context.md)')
|
||||
parser.add_argument('--agent', default='main',
|
||||
help='Agent name for output directory')
|
||||
parser.add_argument('--jsonl', metavar='FILE',
|
||||
help='Load events from JSONL file (use - for stdin)')
|
||||
parser.add_argument('--json', action='store_true',
|
||||
help='Output patterns as JSON')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
stream_info = None
|
||||
|
||||
if args.jsonl:
|
||||
events = load_jsonl_events(args.jsonl)
|
||||
else:
|
||||
# Fetch from NATS
|
||||
stream_info = get_stream_info()
|
||||
if not stream_info:
|
||||
print("❌ Could not get stream info", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f" Stream has {stream_info['messages']:,} total events", file=sys.stderr)
|
||||
|
||||
start_seq = max(stream_info['first_seq'], stream_info['last_seq'] - args.events + 1)
|
||||
end_seq = stream_info['last_seq']
|
||||
|
||||
print(f" Fetching seq {start_seq} - {end_seq}...", file=sys.stderr)
|
||||
events = fetch_nats_events(start_seq, end_seq)
|
||||
|
||||
if not events:
|
||||
print("⚠️ No events found", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
|
||||
patterns = run_context_generation(events, args.output, args.agent, stream_info)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(patterns, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
528
cortex/learn.py
Normal file
528
cortex/learn.py
Normal file
|
|
@ -0,0 +1,528 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Cortex Learn — Extract preferences and behavior patterns from NATS events.
|
||||
|
||||
Ported from ~/clawd/scripts/learning-processor.mjs
|
||||
|
||||
Usage:
|
||||
cortex learn --since 24h [--nats nats://localhost:4222]
|
||||
cortex learn --jsonl input.jsonl
|
||||
cat events.jsonl | cortex learn --jsonl -
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from cortex.config import cortex_home
|
||||
|
||||
# Try to import nats (optional dependency)
|
||||
try:
|
||||
import nats
|
||||
from nats.js.api import StreamSource
|
||||
HAS_NATS = True
|
||||
except ImportError:
|
||||
HAS_NATS = False
|
||||
|
||||
|
||||
# --- Directories ---
|
||||
|
||||
def learning_dir() -> Path:
|
||||
"""Directory for learning output files."""
|
||||
return cortex_home() / "learning"
|
||||
|
||||
|
||||
# --- Constants ---
|
||||
|
||||
# German + English stopwords to filter from topics
|
||||
STOPWORDS = {
|
||||
# German
|
||||
'nicht', 'dass', 'wenn', 'dann', 'aber', 'oder', 'und', 'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'ist', 'sind', 'war', 'waren', 'wird', 'werden',
|
||||
'hat', 'haben', 'kann', 'können', 'muss', 'müssen', 'soll', 'sollte',
|
||||
'wird', 'würde', 'hier', 'dort', 'jetzt', 'noch', 'schon', 'auch', 'nur',
|
||||
'sehr', 'mehr', 'viel', 'ganz', 'alle', 'allem', 'allen', 'aller', 'alles',
|
||||
'heute', 'morgen', 'gestern', 'immer', 'wieder', 'also', 'dabei', 'damit',
|
||||
'danach', 'darin', 'darum', 'davon', 'dazu', 'denn', 'doch', 'durch',
|
||||
# English
|
||||
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had',
|
||||
'her', 'was', 'one', 'our', 'out', 'has', 'have', 'been', 'would', 'could',
|
||||
'this', 'that', 'with', 'from', 'what', 'which', 'their', 'will', 'there',
|
||||
}
|
||||
|
||||
# Signal patterns for preference extraction
|
||||
POSITIVE_PATTERNS = [
|
||||
r'\bmega\b', r'\bsuper\b', r'\bperfekt\b', r'\bgenau\s*so\b', r'\bja\s*!',
|
||||
r'\bcool\b', r'gut\s*gemacht', r'das\s*war\s*gut', r'gefällt\s*mir',
|
||||
r'\bklasse\b', r'\btoll\b', r'👍', r'🎉', r'💪', r'❤️', r'😘', r'🚀', r'✅', r'💯',
|
||||
]
|
||||
|
||||
NEGATIVE_PATTERNS = [
|
||||
r'\bnein,\s*ich', r'nicht\s*so,', r'\bfalsch\b', r'\bstopp\b',
|
||||
r'das\s*stimmt\s*nicht', r'ich\s*meinte\s*eigentlich', r'❌', r'👎',
|
||||
]
|
||||
|
||||
CORRECTION_PATTERNS = [
|
||||
r'nein,?\s*ich\s*meinte',
|
||||
r'nicht\s*so,?\s*sondern',
|
||||
r'das\s*ist\s*falsch',
|
||||
r'eigentlich\s*wollte\s*ich',
|
||||
]
|
||||
|
||||
# Compile patterns for efficiency
|
||||
POSITIVE_RE = [re.compile(p, re.IGNORECASE) for p in POSITIVE_PATTERNS]
|
||||
NEGATIVE_RE = [re.compile(p, re.IGNORECASE) for p in NEGATIVE_PATTERNS]
|
||||
CORRECTION_RE = [re.compile(p, re.IGNORECASE) for p in CORRECTION_PATTERNS]
|
||||
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
def parse_duration(duration_str: str) -> timedelta:
|
||||
"""Parse duration string like '24h', '7d', '30m' to timedelta."""
|
||||
match = re.match(r'^(\d+)([hdm])$', duration_str.lower())
|
||||
if not match:
|
||||
raise ValueError(f"Invalid duration format: {duration_str}. Use like '24h', '7d', '30m'")
|
||||
|
||||
value = int(match.group(1))
|
||||
unit = match.group(2)
|
||||
|
||||
if unit == 'h':
|
||||
return timedelta(hours=value)
|
||||
elif unit == 'd':
|
||||
return timedelta(days=value)
|
||||
elif unit == 'm':
|
||||
return timedelta(minutes=value)
|
||||
else:
|
||||
raise ValueError(f"Unknown unit: {unit}")
|
||||
|
||||
|
||||
def load_json(path: Path, default: Any = None) -> Any:
|
||||
"""Load JSON file, returning default if not found."""
|
||||
if path.exists():
|
||||
return json.loads(path.read_text())
|
||||
return default if default is not None else {}
|
||||
|
||||
|
||||
def save_json(path: Path, data: Any) -> None:
|
||||
"""Save data as JSON file."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
def extract_text(event: dict) -> str:
|
||||
"""Extract text content from various event formats."""
|
||||
# Try different paths where text might be
|
||||
if isinstance(event.get('payload'), dict):
|
||||
payload = event['payload']
|
||||
if 'data' in payload and isinstance(payload['data'], dict):
|
||||
if 'text' in payload['data']:
|
||||
return payload['data']['text']
|
||||
if 'content' in payload:
|
||||
return payload['content']
|
||||
if 'text_preview' in payload:
|
||||
previews = payload['text_preview']
|
||||
if isinstance(previews, list):
|
||||
return ' '.join(p.get('text', '') for p in previews if isinstance(p, dict))
|
||||
|
||||
if 'data' in event and isinstance(event['data'], dict):
|
||||
if 'text' in event['data']:
|
||||
return event['data']['text']
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
# --- Signal Extraction ---
|
||||
|
||||
def extract_signals(events: list[dict]) -> dict:
|
||||
"""Extract preference signals from events."""
|
||||
signals = {
|
||||
'positive': [],
|
||||
'negative': [],
|
||||
'corrections': [],
|
||||
'topics': {}, # topic -> count
|
||||
'skill_gaps': {}, # domain -> count
|
||||
'task_outcomes': [],
|
||||
}
|
||||
|
||||
for event in events:
|
||||
text = extract_text(event)
|
||||
if not text:
|
||||
continue
|
||||
|
||||
timestamp = event.get('timestamp', event.get('timestampMs', 0))
|
||||
|
||||
# Check for positive signals
|
||||
for pattern in POSITIVE_RE:
|
||||
if pattern.search(text):
|
||||
signals['positive'].append({
|
||||
'timestamp': timestamp,
|
||||
'text': text[:200],
|
||||
'pattern': pattern.pattern,
|
||||
})
|
||||
break
|
||||
|
||||
# Check for negative signals
|
||||
for pattern in NEGATIVE_RE:
|
||||
if pattern.search(text):
|
||||
signals['negative'].append({
|
||||
'timestamp': timestamp,
|
||||
'text': text[:200],
|
||||
'pattern': pattern.pattern,
|
||||
})
|
||||
break
|
||||
|
||||
# Check for corrections
|
||||
for pattern in CORRECTION_RE:
|
||||
if pattern.search(text):
|
||||
signals['corrections'].append({
|
||||
'timestamp': timestamp,
|
||||
'text': text[:200],
|
||||
})
|
||||
break
|
||||
|
||||
# Extract topics (capitalized words/phrases)
|
||||
topic_matches = re.findall(r'[A-Z][a-zäöü]+(?:\s+[A-Z][a-zäöü]+)*', text)
|
||||
for topic in topic_matches:
|
||||
if len(topic) > 3 and topic.lower() not in STOPWORDS:
|
||||
signals['topics'][topic] = signals['topics'].get(topic, 0) + 1
|
||||
|
||||
# Detect skill gaps (errors, retries)
|
||||
if re.search(r'error|fehler|failed|nicht gefunden|module not found', text, re.IGNORECASE):
|
||||
domains = re.findall(r'TypeScript|NATS|Matrix|Python|Node|npm|git', text, re.IGNORECASE)
|
||||
for domain in (domains or ['unknown']):
|
||||
d = domain.lower()
|
||||
signals['skill_gaps'][d] = signals['skill_gaps'].get(d, 0) + 1
|
||||
|
||||
# Track task outcomes (lifecycle events)
|
||||
event_type = event.get('type', '')
|
||||
if 'lifecycle' in event_type:
|
||||
phase = event.get('payload', {}).get('data', {}).get('phase', '')
|
||||
if phase in ('end', 'error'):
|
||||
signals['task_outcomes'].append({
|
||||
'timestamp': timestamp,
|
||||
'success': phase == 'end',
|
||||
'session': event.get('session', ''),
|
||||
})
|
||||
|
||||
return signals
|
||||
|
||||
|
||||
def derive_preferences(signals: dict, existing: dict) -> dict:
|
||||
"""Derive preferences from extracted signals."""
|
||||
prefs = {**existing}
|
||||
|
||||
positive_count = len(signals['positive'])
|
||||
negative_count = len(signals['negative'])
|
||||
correction_count = len(signals['corrections'])
|
||||
|
||||
prefs['lastAnalysis'] = datetime.now().isoformat()
|
||||
prefs['signalCounts'] = {
|
||||
'positive': positive_count,
|
||||
'negative': negative_count,
|
||||
'corrections': correction_count,
|
||||
}
|
||||
|
||||
# Derive insights
|
||||
prefs['derived'] = prefs.get('derived', {})
|
||||
|
||||
if correction_count > 5:
|
||||
prefs['derived']['needsMoreClarification'] = True
|
||||
|
||||
if positive_count > negative_count * 2:
|
||||
prefs['derived']['approachWorking'] = True
|
||||
|
||||
# Top topics
|
||||
top_topics = sorted(signals['topics'].items(), key=lambda x: -x[1])[:10]
|
||||
prefs['topTopics'] = [{'topic': t, 'count': c} for t, c in top_topics]
|
||||
|
||||
# Explicit preferences (defaults)
|
||||
prefs['explicit'] = {
|
||||
'responseLength': 'concise',
|
||||
'technicalDepth': 'high',
|
||||
'humor': 'appreciated',
|
||||
'tablesInTelegram': 'never',
|
||||
'proactiveSuggestions': 'always',
|
||||
'fleiß': 'mandatory',
|
||||
'ehrgeiz': 'mandatory',
|
||||
'biss': 'mandatory',
|
||||
}
|
||||
|
||||
return prefs
|
||||
|
||||
|
||||
def derive_behaviors(signals: dict, existing: dict) -> dict:
|
||||
"""Derive behavior patterns from signals."""
|
||||
behaviors = {**existing}
|
||||
behaviors['lastAnalysis'] = datetime.now().isoformat()
|
||||
|
||||
behaviors['successPatterns'] = behaviors.get('successPatterns', [])
|
||||
behaviors['failurePatterns'] = behaviors.get('failurePatterns', [])
|
||||
|
||||
# Recent positive signals indicate successful patterns
|
||||
for pos in signals['positive'][-10:]:
|
||||
behaviors['successPatterns'].append({
|
||||
'timestamp': pos['timestamp'],
|
||||
'context': pos['text'][:100],
|
||||
})
|
||||
|
||||
# Keep only last 50 patterns
|
||||
behaviors['successPatterns'] = behaviors['successPatterns'][-50:]
|
||||
behaviors['failurePatterns'] = behaviors['failurePatterns'][-50:]
|
||||
|
||||
return behaviors
|
||||
|
||||
|
||||
def generate_learning_context(prefs: dict, behaviors: dict, topics: dict, skill_gaps: dict) -> str:
|
||||
"""Generate LEARNING_CONTEXT.md content."""
|
||||
lines = [
|
||||
'# Learned Context',
|
||||
'',
|
||||
f'*Auto-generated by Learning Processor at {datetime.now().isoformat()}*',
|
||||
'',
|
||||
'## Core Values (Fleiß, Ehrgeiz, Biss)',
|
||||
'',
|
||||
'- **Proaktiv handeln** — Nicht warten, sondern vorangehen',
|
||||
'- **Bei Fehlern:** 3 Alternativen probieren, dann nochmal 3, dann erst fragen',
|
||||
'- **Enttäuschung als Treibstoff** — Doppelter Fleiß bis es funktioniert',
|
||||
'',
|
||||
'## Learned Preferences',
|
||||
'',
|
||||
]
|
||||
|
||||
if prefs.get('explicit'):
|
||||
for key, value in prefs['explicit'].items():
|
||||
lines.append(f'- **{key}:** {value}')
|
||||
|
||||
lines.extend(['', '## Active Topics', ''])
|
||||
|
||||
real_topics = [t for t in prefs.get('topTopics', [])
|
||||
if t['topic'].lower() not in STOPWORDS][:8]
|
||||
|
||||
for t in real_topics:
|
||||
lines.append(f"- {t['topic']} ({t['count']} mentions)")
|
||||
|
||||
if skill_gaps.get('priority'):
|
||||
lines.extend(['', '## Areas to Improve', ''])
|
||||
for gap in skill_gaps['priority']:
|
||||
lines.append(f"- **{gap['area']}:** {gap['frequency']} recent errors — study this!")
|
||||
|
||||
lines.extend(['', '## Signal Summary', ''])
|
||||
lines.append(f"- Positive signals: {prefs.get('signalCounts', {}).get('positive', 0)}")
|
||||
lines.append(f"- Corrections needed: {prefs.get('signalCounts', {}).get('corrections', 0)}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
# --- NATS Event Fetching ---
|
||||
|
||||
async def fetch_nats_events(nats_url: str, hours: int = 24) -> list[dict]:
|
||||
"""Fetch recent events from NATS JetStream."""
|
||||
if not HAS_NATS:
|
||||
print("❌ nats-py not installed. Use: pip install nats-py", file=sys.stderr)
|
||||
return []
|
||||
|
||||
events = []
|
||||
cutoff = int((datetime.now() - timedelta(hours=hours)).timestamp() * 1000)
|
||||
|
||||
# Parse credentials from URL or environment
|
||||
user = os.environ.get('NATS_USER', 'claudia')
|
||||
password = os.environ.get('NATS_PASSWORD', '')
|
||||
|
||||
try:
|
||||
nc = await nats.connect(nats_url, user=user, password=password)
|
||||
js = nc.jetstream()
|
||||
|
||||
# Get stream info
|
||||
try:
|
||||
stream = await js.stream_info('openclaw-events')
|
||||
last_seq = stream.state.last_seq
|
||||
start_seq = max(1, last_seq - 500) # Last 500 events
|
||||
|
||||
# Fetch messages by sequence
|
||||
for seq in range(start_seq, last_seq + 1):
|
||||
try:
|
||||
msg = await js.get_msg('openclaw-events', seq)
|
||||
event = json.loads(msg.data.decode())
|
||||
if event.get('timestamp', 0) >= cutoff:
|
||||
events.append(event)
|
||||
except Exception:
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"❌ Error reading stream: {e}", file=sys.stderr)
|
||||
|
||||
await nc.drain()
|
||||
except Exception as e:
|
||||
print(f"❌ NATS connection failed: {e}", file=sys.stderr)
|
||||
|
||||
return events
|
||||
|
||||
|
||||
def load_jsonl_events(source: str, hours: int = 24) -> list[dict]:
|
||||
"""Load events from JSONL file or stdin."""
|
||||
events = []
|
||||
cutoff = int((datetime.now() - timedelta(hours=hours)).timestamp() * 1000)
|
||||
|
||||
if source == '-':
|
||||
lines = sys.stdin
|
||||
else:
|
||||
path = Path(source)
|
||||
if not path.exists():
|
||||
print(f"❌ File not found: {source}", file=sys.stderr)
|
||||
return []
|
||||
lines = path.open()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
event = json.loads(line)
|
||||
# Filter by time if timestamp present
|
||||
ts = event.get('timestamp', event.get('timestampMs', 0))
|
||||
if ts == 0 or ts >= cutoff:
|
||||
events.append(event)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if source != '-':
|
||||
lines.close()
|
||||
|
||||
return events
|
||||
|
||||
|
||||
# --- Main ---
|
||||
|
||||
def run_learning_cycle(events: list[dict], agent: str = 'main') -> dict:
|
||||
"""Run the learning cycle on provided events."""
|
||||
ldir = learning_dir() / agent
|
||||
ldir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# File paths
|
||||
prefs_file = ldir / 'preferences.json'
|
||||
behaviors_file = ldir / 'behaviors.json'
|
||||
topics_file = ldir / 'topics.json'
|
||||
skill_gaps_file = ldir / 'skill-gaps.json'
|
||||
context_file = ldir / 'learning-context.md'
|
||||
|
||||
print(f'🧠 Learning Processor', file=sys.stderr)
|
||||
print(f' Events: {len(events)}', file=sys.stderr)
|
||||
|
||||
# Extract signals
|
||||
signals = extract_signals(events)
|
||||
print(f' Positive: {len(signals["positive"])}', file=sys.stderr)
|
||||
print(f' Negative: {len(signals["negative"])}', file=sys.stderr)
|
||||
print(f' Corrections: {len(signals["corrections"])}', file=sys.stderr)
|
||||
print(f' Topics: {len(signals["topics"])}', file=sys.stderr)
|
||||
|
||||
# Load existing data
|
||||
existing_prefs = load_json(prefs_file, {})
|
||||
existing_behaviors = load_json(behaviors_file, {})
|
||||
|
||||
# Derive new preferences and behaviors
|
||||
new_prefs = derive_preferences(signals, existing_prefs)
|
||||
new_behaviors = derive_behaviors(signals, existing_behaviors)
|
||||
|
||||
# Save all files
|
||||
save_json(prefs_file, new_prefs)
|
||||
save_json(behaviors_file, new_behaviors)
|
||||
|
||||
topics_data = {
|
||||
'lastAnalysis': datetime.now().isoformat(),
|
||||
'topics': signals['topics'],
|
||||
}
|
||||
save_json(topics_file, topics_data)
|
||||
|
||||
skill_gaps_data = {
|
||||
'lastAnalysis': datetime.now().isoformat(),
|
||||
'gaps': signals['skill_gaps'],
|
||||
'priority': sorted(
|
||||
[{'area': k, 'frequency': v} for k, v in signals['skill_gaps'].items()],
|
||||
key=lambda x: -x['frequency']
|
||||
)[:5],
|
||||
}
|
||||
save_json(skill_gaps_file, skill_gaps_data)
|
||||
|
||||
# Generate context markdown
|
||||
context_md = generate_learning_context(new_prefs, new_behaviors, topics_data, skill_gaps_data)
|
||||
context_file.write_text(context_md)
|
||||
|
||||
print(f'\n✅ Learning cycle complete!', file=sys.stderr)
|
||||
print(f' Output: {ldir}', file=sys.stderr)
|
||||
|
||||
# Print summary
|
||||
if new_prefs.get('topTopics'):
|
||||
print('\n📈 Top Topics:', file=sys.stderr)
|
||||
for t in new_prefs['topTopics'][:5]:
|
||||
print(f" - {t['topic']}: {t['count']}", file=sys.stderr)
|
||||
|
||||
if skill_gaps_data['priority']:
|
||||
print('\n⚠️ Skill Gaps:', file=sys.stderr)
|
||||
for gap in skill_gaps_data['priority']:
|
||||
print(f" - {gap['area']}: {gap['frequency']} errors", file=sys.stderr)
|
||||
|
||||
return {
|
||||
'preferences': new_prefs,
|
||||
'behaviors': new_behaviors,
|
||||
'topics': topics_data,
|
||||
'skill_gaps': skill_gaps_data,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract preferences and patterns from NATS events',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Examples:
|
||||
cortex learn --since 24h
|
||||
cortex learn --since 7d --nats nats://localhost:4222
|
||||
cortex learn --jsonl events.jsonl
|
||||
cat events.jsonl | cortex learn --jsonl -
|
||||
'''
|
||||
)
|
||||
parser.add_argument('--since', default='24h',
|
||||
help='Time window (e.g. 24h, 7d, 30m)')
|
||||
parser.add_argument('--nats', default='nats://localhost:4222',
|
||||
help='NATS server URL')
|
||||
parser.add_argument('--jsonl', metavar='FILE',
|
||||
help='Load events from JSONL file (use - for stdin)')
|
||||
parser.add_argument('--agent', default='main',
|
||||
help='Agent name for output directory')
|
||||
parser.add_argument('--json', action='store_true',
|
||||
help='Output results as JSON')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse duration
|
||||
try:
|
||||
duration = parse_duration(args.since)
|
||||
hours = duration.total_seconds() / 3600
|
||||
except ValueError as e:
|
||||
print(f"❌ {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Fetch events
|
||||
if args.jsonl:
|
||||
events = load_jsonl_events(args.jsonl, int(hours))
|
||||
else:
|
||||
import asyncio
|
||||
events = asyncio.run(fetch_nats_events(args.nats, int(hours)))
|
||||
|
||||
if not events:
|
||||
print("⚠️ No events found", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
|
||||
# Run learning cycle
|
||||
results = run_learning_cycle(events, args.agent)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(results, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -44,6 +44,30 @@ JOBS = {
|
|||
"default_interval_min": 30,
|
||||
"calendar": "*-*-* *:0/30:00",
|
||||
},
|
||||
"learn": {
|
||||
"description": "Extract preferences from NATS events",
|
||||
"command": "cortex learn --since {interval}",
|
||||
"default_interval_min": 360, # 6 hours
|
||||
"calendar": "*-*-* 0/6:00:00",
|
||||
},
|
||||
"context": {
|
||||
"description": "Generate learning context from events",
|
||||
"command": "cortex context --events 2000",
|
||||
"default_interval_min": 360, # 6 hours
|
||||
"calendar": "*-*-* 0/6:00:00",
|
||||
},
|
||||
"tracker": {
|
||||
"description": "Scan for commitments and contradictions",
|
||||
"command": "cortex track scan --since {interval}",
|
||||
"default_interval_min": 1440, # daily
|
||||
"calendar": "*-*-* 06:00:00",
|
||||
},
|
||||
"sentinel": {
|
||||
"description": "Security feed aggregation and CVE matching",
|
||||
"command": "cortex sentinel scan && cortex sentinel matches",
|
||||
"default_interval_min": 360, # 6 hours
|
||||
"calendar": "*-*-* 0/6:00:00",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
807
cortex/sentinel.py
Normal file
807
cortex/sentinel.py
Normal file
|
|
@ -0,0 +1,807 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Cortex Sentinel — Security Feed Aggregation and CVE Matching.
|
||||
|
||||
Consolidated from ~/clawd/scripts/sentinel/ (rss-fetch.py, db.py, cve-match.py, report-gen.py)
|
||||
|
||||
Features:
|
||||
- RSS security feed aggregation
|
||||
- SQLite-based deduplication
|
||||
- CVE matching against local inventory
|
||||
- Report generation (markdown + AI summary)
|
||||
|
||||
Usage:
|
||||
cortex sentinel scan [--nmap]
|
||||
cortex sentinel report [--llm]
|
||||
cortex sentinel matches
|
||||
cortex sentinel stats
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from cortex.config import cortex_home
|
||||
|
||||
# Try to import feedparser (optional dependency)
|
||||
try:
|
||||
import feedparser
|
||||
HAS_FEEDPARSER = True
|
||||
except ImportError:
|
||||
HAS_FEEDPARSER = False
|
||||
|
||||
# Disable SSL warnings for problematic feeds
|
||||
try:
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# --- Configuration ---
|
||||
|
||||
def _env(key: str, default: str = '') -> str:
|
||||
return os.environ.get(key, default)
|
||||
|
||||
|
||||
def sentinel_dir() -> Path:
|
||||
"""Base directory for sentinel data."""
|
||||
return cortex_home() / "sentinel"
|
||||
|
||||
|
||||
def sentinel_db_path() -> Path:
|
||||
"""Path to sentinel SQLite database."""
|
||||
return sentinel_dir() / "sentinel.db"
|
||||
|
||||
|
||||
def feeds_dir() -> Path:
|
||||
"""Directory for feed output files."""
|
||||
d = sentinel_dir() / "feeds"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
|
||||
def reports_dir() -> Path:
|
||||
"""Directory for report output files."""
|
||||
d = sentinel_dir() / "reports"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
|
||||
def llm_url() -> str:
|
||||
"""LLM API URL for AI summaries."""
|
||||
return _env('CORTEX_LLM_URL', 'http://localhost:11434/api/generate')
|
||||
|
||||
|
||||
def llm_model() -> str:
|
||||
"""LLM model to use."""
|
||||
return _env('CORTEX_LLM_MODEL', 'mistral:7b')
|
||||
|
||||
|
||||
# User agent for HTTP requests
|
||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0"
|
||||
|
||||
|
||||
# --- Feed Configuration ---
|
||||
|
||||
FEEDS = {
|
||||
# Security News
|
||||
"bleepingcomputer": {
|
||||
"url": "https://www.bleepingcomputer.com/feed/",
|
||||
"category": "security-news"
|
||||
},
|
||||
"hackernews": {
|
||||
"url": "https://feeds.feedburner.com/TheHackersNews",
|
||||
"category": "security-news"
|
||||
},
|
||||
"darkreading": {
|
||||
"url": "https://www.darkreading.com/rss.xml",
|
||||
"category": "security-news"
|
||||
},
|
||||
"schneier": {
|
||||
"url": "https://www.schneier.com/feed/atom/",
|
||||
"category": "security-news"
|
||||
},
|
||||
"securityweek": {
|
||||
"url": "https://www.securityweek.com/feed/",
|
||||
"category": "security-news"
|
||||
},
|
||||
|
||||
# CVE/Vulnerability Feeds
|
||||
"nvd-recent": {
|
||||
"url": "https://nvd.nist.gov/feeds/xml/cve/misc/nvd-rss.xml",
|
||||
"category": "cve",
|
||||
"verify_ssl": False
|
||||
},
|
||||
"cisa-alerts": {
|
||||
"url": "https://www.cisa.gov/cybersecurity-advisories/all.xml",
|
||||
"category": "cve",
|
||||
"verify_ssl": False
|
||||
},
|
||||
|
||||
# AI/ML Security
|
||||
"huggingface-blog": {
|
||||
"url": "https://huggingface.co/blog/feed.xml",
|
||||
"category": "ai-security"
|
||||
},
|
||||
"google-ai-blog": {
|
||||
"url": "https://blog.google/technology/ai/rss/",
|
||||
"category": "ai-security"
|
||||
},
|
||||
|
||||
# Exploit Databases
|
||||
"exploitdb": {
|
||||
"url": "https://www.exploit-db.com/rss.xml",
|
||||
"category": "exploits",
|
||||
"verify_ssl": False
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Keywords that indicate relevance to our infrastructure
|
||||
RELEVANT_KEYWORDS = [
|
||||
# Tech stack
|
||||
"linux", "debian", "nginx", "traefik", "docker", "postgresql", "redis",
|
||||
"node.js", "nodejs", "python", "openssh", "git", "chromium", "openssl",
|
||||
"ollama", "llm", "whisper", "matrix", "synapse", "element",
|
||||
|
||||
# Hardware
|
||||
"amd", "radeon", "rocm", "fritzbox", "avm",
|
||||
|
||||
# Critical issues
|
||||
"critical", "rce", "remote code execution", "zero-day", "0-day",
|
||||
"ransomware", "supply chain", "authentication bypass",
|
||||
|
||||
# AI-specific
|
||||
"prompt injection", "jailbreak", "model extraction", "adversarial",
|
||||
"llm vulnerability", "ai safety", "model poisoning"
|
||||
]
|
||||
|
||||
|
||||
# Software inventory for CVE matching
|
||||
INVENTORY = {
|
||||
"operating_systems": [
|
||||
{"name": "Debian", "version": "12", "aliases": ["debian", "bookworm"]},
|
||||
{"name": "Linux Kernel", "version": "6.1", "aliases": ["linux", "kernel"]},
|
||||
],
|
||||
"services": [
|
||||
{"name": "OpenSSH", "version": "9.2", "aliases": ["ssh", "openssh", "sshd"]},
|
||||
{"name": "Nginx", "version": "1.22", "aliases": ["nginx"]},
|
||||
{"name": "Traefik", "version": "2.10", "aliases": ["traefik"]},
|
||||
{"name": "Docker", "version": "24", "aliases": ["docker", "containerd"]},
|
||||
{"name": "Node.js", "version": "22", "aliases": ["node", "nodejs", "npm"]},
|
||||
{"name": "Python", "version": "3.11", "aliases": ["python", "python3"]},
|
||||
{"name": "PostgreSQL", "version": "15", "aliases": ["postgres", "postgresql"]},
|
||||
{"name": "Redis", "version": "7", "aliases": ["redis"]},
|
||||
{"name": "Ollama", "version": "0.1", "aliases": ["ollama", "llama"]},
|
||||
],
|
||||
"applications": [
|
||||
{"name": "Chromium", "version": "120", "aliases": ["chromium", "chrome"]},
|
||||
{"name": "Git", "version": "2.39", "aliases": ["git"]},
|
||||
{"name": "OpenSSL", "version": "3.0", "aliases": ["openssl", "ssl", "tls"]},
|
||||
],
|
||||
"hardware": [
|
||||
{"name": "AMD Radeon RX 5700 XT", "aliases": ["amd", "radeon", "rx5700", "navi", "gfx1010"]},
|
||||
{"name": "Fritz!Box", "aliases": ["fritzbox", "fritz", "avm"]},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# --- Database ---
|
||||
|
||||
def get_db() -> sqlite3.Connection:
|
||||
"""Get database connection with row factory."""
|
||||
sentinel_dir().mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(sentinel_db_path())
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
"""Initialize database schema."""
|
||||
conn = get_db()
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS alerts (
|
||||
id TEXT PRIMARY KEY,
|
||||
source TEXT NOT NULL,
|
||||
category TEXT,
|
||||
title TEXT NOT NULL,
|
||||
link TEXT,
|
||||
summary TEXT,
|
||||
severity TEXT DEFAULT 'info',
|
||||
relevant INTEGER DEFAULT 0,
|
||||
first_seen TEXT NOT NULL,
|
||||
last_seen TEXT NOT NULL,
|
||||
seen_count INTEGER DEFAULT 1,
|
||||
notified INTEGER DEFAULT 0,
|
||||
acknowledged INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_source ON alerts(source);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_severity ON alerts(severity);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_first_seen ON alerts(first_seen);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_notified ON alerts(notified);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT NOT NULL,
|
||||
total_fetched INTEGER,
|
||||
new_alerts INTEGER,
|
||||
duplicates INTEGER,
|
||||
notified INTEGER
|
||||
);
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def add_alert(alert: dict) -> bool:
|
||||
"""Add alert if new, update if exists. Returns True if new."""
|
||||
conn = get_db()
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
cur = conn.execute("SELECT id, seen_count FROM alerts WHERE id = ?", (alert["id"],))
|
||||
existing = cur.fetchone()
|
||||
|
||||
if existing:
|
||||
conn.execute("""
|
||||
UPDATE alerts SET last_seen = ?, seen_count = seen_count + 1
|
||||
WHERE id = ?
|
||||
""", (now, alert["id"]))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return False
|
||||
else:
|
||||
conn.execute("""
|
||||
INSERT INTO alerts (id, source, category, title, link, summary,
|
||||
severity, relevant, first_seen, last_seen)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
alert["id"],
|
||||
alert.get("source", "unknown"),
|
||||
alert.get("category", ""),
|
||||
alert.get("title", "")[:500],
|
||||
alert.get("link", ""),
|
||||
alert.get("summary", "")[:1000],
|
||||
alert.get("severity", "info"),
|
||||
1 if alert.get("relevant") else 0,
|
||||
now,
|
||||
now
|
||||
))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
|
||||
def log_run(total: int, new: int, dupes: int, notified: int = 0) -> None:
|
||||
"""Log a sentinel run."""
|
||||
conn = get_db()
|
||||
conn.execute("""
|
||||
INSERT INTO runs (timestamp, total_fetched, new_alerts, duplicates, notified)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (datetime.now().isoformat(), total, new, dupes, notified))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_stats() -> dict:
|
||||
"""Get database statistics."""
|
||||
conn = get_db()
|
||||
stats = {}
|
||||
|
||||
cur = conn.execute("SELECT COUNT(*) FROM alerts")
|
||||
stats["total_alerts"] = cur.fetchone()[0]
|
||||
|
||||
cur = conn.execute("""
|
||||
SELECT severity, COUNT(*) as count FROM alerts
|
||||
GROUP BY severity ORDER BY count DESC
|
||||
""")
|
||||
stats["by_severity"] = {row["severity"]: row["count"] for row in cur.fetchall()}
|
||||
|
||||
cur = conn.execute("SELECT COUNT(*) FROM alerts WHERE notified = 0 AND relevant = 1")
|
||||
stats["unnotified"] = cur.fetchone()[0]
|
||||
|
||||
yesterday = (datetime.now() - timedelta(days=1)).isoformat()
|
||||
cur = conn.execute("SELECT COUNT(*) FROM alerts WHERE first_seen > ?", (yesterday,))
|
||||
stats["last_24h"] = cur.fetchone()[0]
|
||||
|
||||
cur = conn.execute("SELECT * FROM runs ORDER BY timestamp DESC LIMIT 5")
|
||||
stats["recent_runs"] = [dict(row) for row in cur.fetchall()]
|
||||
|
||||
conn.close()
|
||||
return stats
|
||||
|
||||
|
||||
def get_unnotified_alerts(min_severity: str = "medium") -> list[dict]:
|
||||
"""Get alerts that haven't been notified yet."""
|
||||
severity_order = {"critical": 1, "high": 2, "medium": 3, "info": 4}
|
||||
min_level = severity_order.get(min_severity, 3)
|
||||
|
||||
conn = get_db()
|
||||
cur = conn.execute("""
|
||||
SELECT * FROM alerts
|
||||
WHERE notified = 0 AND relevant = 1
|
||||
ORDER BY
|
||||
CASE severity
|
||||
WHEN 'critical' THEN 1
|
||||
WHEN 'high' THEN 2
|
||||
WHEN 'medium' THEN 3
|
||||
ELSE 4
|
||||
END,
|
||||
first_seen DESC
|
||||
LIMIT 20
|
||||
""")
|
||||
alerts = [dict(row) for row in cur.fetchall()]
|
||||
conn.close()
|
||||
|
||||
return [a for a in alerts if severity_order.get(a["severity"], 4) <= min_level]
|
||||
|
||||
|
||||
def get_recent_alerts(limit: int = 50) -> list[dict]:
|
||||
"""Get recent alerts from database."""
|
||||
conn = get_db()
|
||||
cur = conn.execute("""
|
||||
SELECT * FROM alerts
|
||||
ORDER BY first_seen DESC
|
||||
LIMIT ?
|
||||
""", (limit,))
|
||||
alerts = [dict(row) for row in cur.fetchall()]
|
||||
conn.close()
|
||||
return alerts
|
||||
|
||||
|
||||
# --- Feed Fetching ---
|
||||
|
||||
def fetch_feed(name: str, config: dict) -> list[dict]:
|
||||
"""Fetch a single RSS feed."""
|
||||
if not HAS_FEEDPARSER:
|
||||
print(f" ⚠️ feedparser not installed", file=sys.stderr)
|
||||
return []
|
||||
|
||||
url = config["url"]
|
||||
verify_ssl = config.get("verify_ssl", True)
|
||||
|
||||
try:
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
response = requests.get(url, headers=headers, timeout=15, verify=verify_ssl)
|
||||
response.raise_for_status()
|
||||
|
||||
feed = feedparser.parse(response.content)
|
||||
|
||||
if feed.bozo and not feed.entries:
|
||||
print(f" ⚠️ {name}: Parse error", file=sys.stderr)
|
||||
return []
|
||||
|
||||
entries = []
|
||||
for entry in feed.entries[:20]: # Max 20 per feed
|
||||
title = entry.get("title", "No title")
|
||||
link = entry.get("link", "")
|
||||
summary = entry.get("summary", entry.get("description", ""))[:500]
|
||||
published = entry.get("published", entry.get("updated", ""))
|
||||
|
||||
# Check relevance
|
||||
text_check = f"{title} {summary}".lower()
|
||||
is_relevant = any(kw in text_check for kw in RELEVANT_KEYWORDS)
|
||||
|
||||
# Determine severity
|
||||
severity = "info"
|
||||
if any(kw in text_check for kw in ["critical", "rce", "zero-day", "0-day", "ransomware"]):
|
||||
severity = "critical"
|
||||
elif any(kw in text_check for kw in ["high", "remote", "exploit", "vulnerability"]):
|
||||
severity = "high"
|
||||
elif any(kw in text_check for kw in ["medium", "moderate", "security"]):
|
||||
severity = "medium"
|
||||
|
||||
entries.append({
|
||||
"id": hashlib.md5(f"{name}:{link}".encode()).hexdigest()[:12],
|
||||
"source": name,
|
||||
"category": config["category"],
|
||||
"title": title,
|
||||
"link": link,
|
||||
"summary": summary[:300],
|
||||
"published": published,
|
||||
"severity": severity,
|
||||
"relevant": is_relevant,
|
||||
"fetched_at": datetime.now().isoformat()
|
||||
})
|
||||
|
||||
print(f" ✅ {name}: {len(entries)} entries", file=sys.stderr)
|
||||
return entries
|
||||
|
||||
except requests.exceptions.SSLError:
|
||||
if verify_ssl:
|
||||
config["verify_ssl"] = False
|
||||
return fetch_feed(name, config)
|
||||
return []
|
||||
except requests.exceptions.Timeout:
|
||||
print(f" ❌ {name}: Timeout", file=sys.stderr)
|
||||
return []
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" ❌ {name}: {type(e).__name__}", file=sys.stderr)
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f" ❌ {name}: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def fetch_all_feeds() -> tuple[list[dict], int, int]:
|
||||
"""Fetch all configured feeds. Returns (entries, successful, failed)."""
|
||||
all_entries = []
|
||||
successful = 0
|
||||
failed = 0
|
||||
|
||||
for name, config in FEEDS.items():
|
||||
entries = fetch_feed(name, config.copy())
|
||||
if entries:
|
||||
all_entries.extend(entries)
|
||||
successful += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
return all_entries, successful, failed
|
||||
|
||||
|
||||
# --- CVE Matching ---
|
||||
|
||||
def check_inventory_match(text: str) -> list[dict]:
|
||||
"""Check if text mentions any inventory items."""
|
||||
text_lower = text.lower()
|
||||
matches = []
|
||||
|
||||
for category, items in INVENTORY.items():
|
||||
for item in items:
|
||||
for alias in item.get("aliases", []):
|
||||
if alias in text_lower:
|
||||
matches.append({
|
||||
"category": category,
|
||||
"name": item["name"],
|
||||
"version": item.get("version"),
|
||||
"matched_alias": alias
|
||||
})
|
||||
break
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def analyze_matches(alerts: list[dict]) -> dict:
|
||||
"""Analyze alerts for inventory matches."""
|
||||
relevant = []
|
||||
critical = []
|
||||
category_counts = {}
|
||||
|
||||
for alert in alerts:
|
||||
text = f"{alert.get('title', '')} {alert.get('summary', '')}"
|
||||
matches = check_inventory_match(text)
|
||||
|
||||
if matches:
|
||||
alert["inventory_matches"] = matches
|
||||
alert["match_count"] = len(matches)
|
||||
relevant.append(alert)
|
||||
|
||||
if alert.get("severity") == "critical":
|
||||
critical.append(alert)
|
||||
|
||||
for match in matches:
|
||||
cat = match["category"]
|
||||
category_counts[cat] = category_counts.get(cat, 0) + 1
|
||||
|
||||
relevant.sort(key=lambda x: (-x.get("match_count", 0), x.get("severity", "info")))
|
||||
|
||||
return {
|
||||
"analysis_time": datetime.now().isoformat(),
|
||||
"source_alerts": len(alerts),
|
||||
"relevant_alerts": len(relevant),
|
||||
"critical_relevant": len(critical),
|
||||
"category_breakdown": category_counts,
|
||||
"critical": critical[:10],
|
||||
"relevant": relevant[:20],
|
||||
}
|
||||
|
||||
|
||||
# --- Report Generation ---
|
||||
|
||||
def generate_report(data: dict, use_llm: bool = False) -> str:
|
||||
"""Generate markdown security report."""
|
||||
now = datetime.now()
|
||||
|
||||
lines = [
|
||||
"# 🔒 Security Sentinel Report",
|
||||
f"**Generated:** {now.strftime('%Y-%m-%d %H:%M')}",
|
||||
""
|
||||
]
|
||||
|
||||
# Stats
|
||||
stats = get_stats()
|
||||
lines.extend([
|
||||
"## 📊 Database Stats",
|
||||
f"- **Total alerts:** {stats['total_alerts']}",
|
||||
f"- **Last 24h:** {stats['last_24h']}",
|
||||
f"- **Unnotified:** {stats['unnotified']}",
|
||||
""
|
||||
])
|
||||
|
||||
# Matches
|
||||
if data.get("relevant"):
|
||||
lines.extend([
|
||||
f"## 🎯 Relevant Alerts ({data['relevant_alerts']})",
|
||||
""
|
||||
])
|
||||
|
||||
if data.get("critical"):
|
||||
lines.append("### ⚠️ Critical")
|
||||
for alert in data["critical"][:5]:
|
||||
matches = ", ".join(m["name"] for m in alert.get("inventory_matches", []))
|
||||
lines.extend([
|
||||
f"- **{alert['title'][:80]}**",
|
||||
f" - Source: {alert.get('source', 'unknown')}",
|
||||
f" - Affects: {matches}",
|
||||
""
|
||||
])
|
||||
|
||||
lines.append("### 📋 Other Relevant")
|
||||
for alert in data["relevant"][:10]:
|
||||
if alert in data.get("critical", []):
|
||||
continue
|
||||
matches = ", ".join(m["name"] for m in alert.get("inventory_matches", []))
|
||||
lines.append(f"- {alert['title'][:60]}... ({matches})")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# AI Summary
|
||||
if use_llm and data.get("relevant"):
|
||||
lines.extend(["## 🤖 AI Summary", ""])
|
||||
summary = get_ai_summary(data["relevant"][:10])
|
||||
lines.extend([summary, ""])
|
||||
|
||||
# Actions
|
||||
lines.extend([
|
||||
"## 📝 Recommended Actions",
|
||||
""
|
||||
])
|
||||
|
||||
if data.get("critical"):
|
||||
lines.append("1. Review critical alerts and check for available patches")
|
||||
|
||||
if stats["unnotified"] > 10:
|
||||
lines.append(f"2. Process {stats['unnotified']} unnotified alerts")
|
||||
|
||||
if not data.get("critical") and stats["unnotified"] <= 10:
|
||||
lines.append("✅ No immediate actions required")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def get_ai_summary(alerts: list[dict]) -> str:
|
||||
"""Get AI summary of alerts."""
|
||||
if not alerts:
|
||||
return "No alerts to summarize."
|
||||
|
||||
alert_text = "\n".join([
|
||||
f"- [{a.get('severity', 'info').upper()}] {a.get('title', '')}"
|
||||
for a in alerts[:15]
|
||||
])
|
||||
|
||||
prompt = f"""Du bist ein Security-Analyst. Fasse diese Security-Alerts kurz zusammen (max 5 Sätze, Deutsch).
|
||||
Fokus: Was ist kritisch? Was erfordert Aktion?
|
||||
|
||||
Alerts:
|
||||
{alert_text}
|
||||
|
||||
Zusammenfassung:"""
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
llm_url(),
|
||||
json={
|
||||
"model": llm_model(),
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 300}
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json().get("response", "").strip()
|
||||
except Exception as e:
|
||||
return f"(LLM nicht erreichbar: {e})"
|
||||
|
||||
return "(Zusammenfassung nicht verfügbar)"
|
||||
|
||||
|
||||
# --- Commands ---
|
||||
|
||||
def cmd_scan(include_nmap: bool = False) -> None:
|
||||
"""Scan security feeds and update database."""
|
||||
init_db()
|
||||
|
||||
print(f"🛡️ Sentinel Scan — {datetime.now().strftime('%Y-%m-%d %H:%M')}", file=sys.stderr)
|
||||
print(f" Fetching {len(FEEDS)} feeds...", file=sys.stderr)
|
||||
|
||||
all_entries, successful, failed = fetch_all_feeds()
|
||||
|
||||
print(f"\n Feeds: {successful}/{successful+failed} OK", file=sys.stderr)
|
||||
|
||||
# Deduplicate via SQLite
|
||||
print("\n🔍 Deduplicating...", file=sys.stderr)
|
||||
new_count = 0
|
||||
dupe_count = 0
|
||||
new_entries = []
|
||||
|
||||
for entry in all_entries:
|
||||
if add_alert(entry):
|
||||
new_entries.append(entry)
|
||||
new_count += 1
|
||||
else:
|
||||
dupe_count += 1
|
||||
|
||||
# Log run
|
||||
log_run(len(all_entries), new_count, dupe_count, 0)
|
||||
|
||||
# Stats
|
||||
relevant_new = sum(1 for e in new_entries if e.get("relevant"))
|
||||
critical_new = sum(1 for e in new_entries if e.get("severity") == "critical")
|
||||
|
||||
print(f"\n📊 Summary:", file=sys.stderr)
|
||||
print(f" Fetched: {len(all_entries)}", file=sys.stderr)
|
||||
print(f" New: {new_count} ({relevant_new} relevant, {critical_new} critical)", file=sys.stderr)
|
||||
print(f" Duplicates: {dupe_count}", file=sys.stderr)
|
||||
|
||||
# Save to file
|
||||
output = {
|
||||
"fetched_at": datetime.now().isoformat(),
|
||||
"stats": {
|
||||
"total_fetched": len(all_entries),
|
||||
"new_alerts": new_count,
|
||||
"duplicates": dupe_count,
|
||||
"relevant": relevant_new,
|
||||
"critical": critical_new
|
||||
},
|
||||
"entries": new_entries
|
||||
}
|
||||
|
||||
output_file = feeds_dir() / "alerts_latest.json"
|
||||
output_file.write_text(json.dumps(output, indent=2))
|
||||
print(f" Output: {output_file}", file=sys.stderr)
|
||||
|
||||
|
||||
def cmd_matches() -> None:
|
||||
"""Show CVE matches against inventory."""
|
||||
alerts = get_recent_alerts(100)
|
||||
|
||||
if not alerts:
|
||||
print("No alerts in database. Run 'cortex sentinel scan' first.")
|
||||
return
|
||||
|
||||
data = analyze_matches(alerts)
|
||||
|
||||
print(f"🎯 Inventory Matches ({data['relevant_alerts']} of {data['source_alerts']})\n")
|
||||
|
||||
if data.get("critical"):
|
||||
print("⚠️ CRITICAL:\n")
|
||||
for alert in data["critical"][:5]:
|
||||
matches = ", ".join(m["name"] for m in alert.get("inventory_matches", []))
|
||||
print(f" • {alert['title'][:70]}")
|
||||
print(f" Affects: {matches}\n")
|
||||
|
||||
if data.get("relevant"):
|
||||
print("\n📋 Other relevant:\n")
|
||||
for alert in data["relevant"][:10]:
|
||||
if alert in data.get("critical", []):
|
||||
continue
|
||||
matches = ", ".join(m["name"] for m in alert.get("inventory_matches", []))
|
||||
print(f" • {alert['title'][:60]}... ({matches})")
|
||||
|
||||
if data.get("category_breakdown"):
|
||||
print(f"\n📊 By category: {data['category_breakdown']}")
|
||||
|
||||
# Save
|
||||
report_file = reports_dir() / f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
report_file.write_text(json.dumps(data, indent=2))
|
||||
|
||||
|
||||
def cmd_report(use_llm: bool = False) -> None:
|
||||
"""Generate security report."""
|
||||
alerts = get_recent_alerts(100)
|
||||
data = analyze_matches(alerts)
|
||||
|
||||
report = generate_report(data, use_llm)
|
||||
|
||||
# Save
|
||||
report_file = reports_dir() / f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
||||
report_file.write_text(report)
|
||||
|
||||
# Symlink latest
|
||||
latest = reports_dir() / "report_latest.md"
|
||||
if latest.exists() or latest.is_symlink():
|
||||
latest.unlink()
|
||||
latest.symlink_to(report_file.name)
|
||||
|
||||
print(f"✅ Report saved: {report_file}", file=sys.stderr)
|
||||
print(report)
|
||||
|
||||
|
||||
def cmd_stats() -> None:
|
||||
"""Show database statistics."""
|
||||
init_db()
|
||||
stats = get_stats()
|
||||
|
||||
print("📊 Sentinel Stats\n")
|
||||
print(f"Total alerts: {stats['total_alerts']}")
|
||||
print(f"Last 24h: {stats['last_24h']}")
|
||||
print(f"Unnotified: {stats['unnotified']}")
|
||||
|
||||
if stats.get("by_severity"):
|
||||
print(f"\nBy severity:")
|
||||
for sev, count in stats["by_severity"].items():
|
||||
print(f" {sev}: {count}")
|
||||
|
||||
if stats.get("recent_runs"):
|
||||
print(f"\nRecent runs:")
|
||||
for run in stats["recent_runs"][:3]:
|
||||
ts = run.get("timestamp", "")[:16]
|
||||
print(f" {ts} — {run.get('new_alerts', 0)} new, {run.get('duplicates', 0)} dupes")
|
||||
|
||||
|
||||
# --- Main ---
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Security Feed Aggregation and CVE Matching',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Commands:
|
||||
scan Fetch security feeds and update database
|
||||
matches Show alerts matching local inventory
|
||||
report Generate markdown security report
|
||||
stats Show database statistics
|
||||
|
||||
Examples:
|
||||
cortex sentinel scan
|
||||
cortex sentinel matches
|
||||
cortex sentinel report --llm
|
||||
'''
|
||||
)
|
||||
|
||||
sub = parser.add_subparsers(dest='command')
|
||||
|
||||
# scan
|
||||
scan_p = sub.add_parser('scan', help='Fetch security feeds')
|
||||
scan_p.add_argument('--nmap', action='store_true',
|
||||
help='Include network scan (slow)')
|
||||
|
||||
# matches
|
||||
sub.add_parser('matches', help='Show inventory matches')
|
||||
|
||||
# report
|
||||
report_p = sub.add_parser('report', help='Generate report')
|
||||
report_p.add_argument('--llm', action='store_true',
|
||||
help='Include AI summary')
|
||||
|
||||
# stats
|
||||
sub.add_parser('stats', help='Show database stats')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'scan':
|
||||
cmd_scan(getattr(args, 'nmap', False))
|
||||
elif args.command == 'matches':
|
||||
cmd_matches()
|
||||
elif args.command == 'report':
|
||||
cmd_report(getattr(args, 'llm', False))
|
||||
elif args.command == 'stats':
|
||||
cmd_stats()
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
617
cortex/tracker.py
Normal file
617
cortex/tracker.py
Normal file
|
|
@ -0,0 +1,617 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Cortex Tracker — Commitment and Contradiction Detection.
|
||||
|
||||
Ported from ~/clawd/scripts/brain-tracker.mjs
|
||||
|
||||
Uses an LLM (configurable via CORTEX_LLM_URL) to analyze messages for:
|
||||
- Commitments/promises (someone agreeing to do something)
|
||||
- Factual claims (verifiable statements)
|
||||
- Contradictions between old and new claims
|
||||
|
||||
Usage:
|
||||
cortex track scan --since 24h
|
||||
cortex track list
|
||||
cortex track done <id>
|
||||
cortex track check "statement"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from cortex.config import cortex_home, memory_dir
|
||||
|
||||
|
||||
# --- Configuration ---
|
||||
|
||||
def _env(key: str, default: str = '') -> str:
|
||||
return os.environ.get(key, default)
|
||||
|
||||
|
||||
def llm_url() -> str:
|
||||
"""LLM API URL (OpenAI-compatible)."""
|
||||
return _env('CORTEX_LLM_URL', 'http://localhost:11434/api/generate')
|
||||
|
||||
|
||||
def llm_model() -> str:
|
||||
"""LLM model to use."""
|
||||
return _env('CORTEX_LLM_MODEL', 'mistral:7b')
|
||||
|
||||
|
||||
def tracker_db_path() -> Path:
|
||||
"""Path to brain-tracker.json database."""
|
||||
return memory_dir() / 'brain-tracker.json'
|
||||
|
||||
|
||||
def get_nats_bin() -> str:
|
||||
"""Find nats binary."""
|
||||
for path in [
|
||||
os.path.expanduser('~/bin/nats'),
|
||||
'/usr/local/bin/nats',
|
||||
'/usr/bin/nats',
|
||||
]:
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
return 'nats'
|
||||
|
||||
|
||||
# --- Database ---
|
||||
|
||||
def load_db() -> dict:
|
||||
"""Load tracker database."""
|
||||
path = tracker_db_path()
|
||||
if path.exists():
|
||||
data = json.loads(path.read_text())
|
||||
# Convert processedIds list back to set
|
||||
data['processedIds'] = set(data.get('processedIds', []))
|
||||
return data
|
||||
return {
|
||||
'commitments': [],
|
||||
'claims': [],
|
||||
'contradictions': [],
|
||||
'processedIds': set(),
|
||||
'lastRun': None,
|
||||
}
|
||||
|
||||
|
||||
def save_db(db: dict) -> None:
|
||||
"""Save tracker database."""
|
||||
path = tracker_db_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Convert set to list for JSON serialization
|
||||
to_save = {
|
||||
**db,
|
||||
'processedIds': list(db['processedIds']),
|
||||
}
|
||||
path.write_text(json.dumps(to_save, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
# --- LLM Integration ---
|
||||
|
||||
def query_llm(prompt: str, timeout: int = 25) -> str:
|
||||
"""Query the LLM with a prompt."""
|
||||
url = llm_url()
|
||||
model = llm_model()
|
||||
|
||||
# Detect API type from URL
|
||||
if '/api/generate' in url:
|
||||
# Ollama API
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
json={
|
||||
'model': model,
|
||||
'prompt': prompt,
|
||||
'stream': False,
|
||||
'options': {
|
||||
'temperature': 0.3,
|
||||
'num_predict': 500,
|
||||
}
|
||||
},
|
||||
timeout=timeout
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return response.json().get('response', '')
|
||||
except Exception as e:
|
||||
print(f"⚠️ LLM error: {e}", file=sys.stderr)
|
||||
return ''
|
||||
else:
|
||||
# OpenAI-compatible API
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
headers={
|
||||
'Authorization': f"Bearer {_env('CORTEX_LLM_API_KEY', '')}",
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
json={
|
||||
'model': model,
|
||||
'messages': [{'role': 'user', 'content': prompt}],
|
||||
'temperature': 0.3,
|
||||
'max_tokens': 500,
|
||||
},
|
||||
timeout=timeout
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return response.json()['choices'][0]['message']['content']
|
||||
except Exception as e:
|
||||
print(f"⚠️ LLM error: {e}", file=sys.stderr)
|
||||
return ''
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
def query_ollama_cli(prompt: str, timeout: int = 25) -> str:
|
||||
"""Query Ollama via CLI as fallback."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['ollama', 'run', llm_model()],
|
||||
input=prompt,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout
|
||||
)
|
||||
return result.stdout or ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
# --- Event Fetching ---
|
||||
|
||||
def fetch_nats_events(subject: str, count: int = 100) -> list[dict]:
|
||||
"""Fetch events from NATS for a given subject."""
|
||||
nats_bin = get_nats_bin()
|
||||
events = []
|
||||
|
||||
# Get last message to find sequence
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[nats_bin, 'stream', 'get', 'openclaw-events', f'--last-for={subject}'],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
|
||||
match = re.search(r'openclaw-events#(\d+)', result.stdout or '')
|
||||
if not match:
|
||||
return []
|
||||
|
||||
last_seq = int(match.group(1))
|
||||
start_seq = max(1, last_seq - count)
|
||||
|
||||
# Create ephemeral consumer
|
||||
consumer_name = f'brain-{int(datetime.now().timestamp())}'
|
||||
|
||||
subprocess.run(
|
||||
[nats_bin, 'consumer', 'rm', 'openclaw-events', consumer_name, '--force'],
|
||||
capture_output=True, timeout=3
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
[nats_bin, 'consumer', 'add', 'openclaw-events', consumer_name,
|
||||
'--pull', f'--deliver={start_seq}', '--ack=none',
|
||||
f'--filter={subject}', '--defaults'],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
[nats_bin, 'consumer', 'next', 'openclaw-events', consumer_name,
|
||||
'--count', str(count), '--raw'],
|
||||
capture_output=True, text=True, timeout=15
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
[nats_bin, 'consumer', 'rm', 'openclaw-events', consumer_name, '--force'],
|
||||
capture_output=True, timeout=3
|
||||
)
|
||||
|
||||
for line in (result.stdout or '').split('\n'):
|
||||
line = line.strip()
|
||||
if not line or not line.startswith('{'):
|
||||
continue
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ NATS error: {e}", file=sys.stderr)
|
||||
|
||||
return events
|
||||
|
||||
|
||||
def load_jsonl_events(source: str, hours: int = 24) -> list[dict]:
|
||||
"""Load events from JSONL file."""
|
||||
events = []
|
||||
cutoff = int((datetime.now() - timedelta(hours=hours)).timestamp() * 1000)
|
||||
|
||||
if source == '-':
|
||||
lines = sys.stdin
|
||||
else:
|
||||
path = Path(source)
|
||||
if not path.exists():
|
||||
return []
|
||||
lines = path.open()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
event = json.loads(line)
|
||||
ts = event.get('timestamp', event.get('timestampMs', 0))
|
||||
if ts == 0 or ts >= cutoff:
|
||||
events.append(event)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if source != '-':
|
||||
lines.close()
|
||||
|
||||
return events
|
||||
|
||||
|
||||
# --- Text Processing ---
|
||||
|
||||
def extract_text(event: dict) -> str:
|
||||
"""Extract text from event."""
|
||||
if event.get('payload', {}).get('text_preview'):
|
||||
previews = event['payload']['text_preview']
|
||||
if previews and isinstance(previews[0], dict):
|
||||
return previews[0].get('text', '')
|
||||
|
||||
if event.get('payload', {}).get('data', {}).get('text'):
|
||||
return event['payload']['data']['text']
|
||||
|
||||
if event.get('payload', {}).get('text'):
|
||||
return event['payload']['text']
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
def clean_text(text: str) -> str:
|
||||
"""Clean text for analysis."""
|
||||
text = re.sub(r'\[.*?\]', '', text) # Remove [timestamps], [message_id]
|
||||
text = re.sub(r'\|', ' ', text) # Tables
|
||||
text = re.sub(r'[*_`#]', '', text) # Markdown
|
||||
text = re.sub(r'https?://\S+', '', text) # URLs
|
||||
text = re.sub(r'\n+', ' ', text) # Newlines
|
||||
text = re.sub(r'\s+', ' ', text) # Multiple spaces
|
||||
return text.strip()[:250]
|
||||
|
||||
|
||||
# --- Analysis ---
|
||||
|
||||
def analyze_message(text: str, existing_claims: list[dict]) -> Optional[dict]:
|
||||
"""Use LLM to analyze a message for commitments and claims."""
|
||||
clean = clean_text(text)
|
||||
if len(clean) < 20:
|
||||
return None
|
||||
if 'HEARTBEAT' in clean or 'cron:' in clean:
|
||||
return None
|
||||
|
||||
prompt = f'''Analyze this message for:
|
||||
1. Commitments/promises (someone agreeing to do something)
|
||||
2. Factual claims (verifiable statements)
|
||||
|
||||
Message: "{clean}"
|
||||
|
||||
Respond with JSON only:
|
||||
{{
|
||||
"commitment": {{"found": false}} or {{"found": true, "what": "description", "who": "user/assistant", "deadline": "if mentioned"}},
|
||||
"claims": [] or [{{"claim": "statement", "topic": "category", "entities": ["names"]}}]
|
||||
}}'''
|
||||
|
||||
response = query_llm(prompt) or query_ollama_cli(prompt)
|
||||
|
||||
# Extract JSON from response
|
||||
json_match = re.search(r'\{[\s\S]*\}', response)
|
||||
if not json_match:
|
||||
return None
|
||||
|
||||
try:
|
||||
return json.loads(json_match.group(0))
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def check_contradiction(new_claim: dict, existing_claims: list[dict]) -> Optional[dict]:
|
||||
"""Check if a new claim contradicts existing claims."""
|
||||
# Find related claims by entities
|
||||
related = []
|
||||
new_entities = [e.lower() for e in new_claim.get('entities', [])]
|
||||
|
||||
for c in existing_claims:
|
||||
old_entities = [e.lower() for e in c.get('entities', [])]
|
||||
if any(ne in oe or oe in ne for ne in new_entities for oe in old_entities):
|
||||
related.append(c)
|
||||
|
||||
if not related:
|
||||
return None
|
||||
|
||||
for old in related[:2]: # Check only first 2
|
||||
prompt = f'''Do these contradict?
|
||||
Old ({old.get("date", "unknown")}): "{old.get("claim", "")}"
|
||||
New: "{new_claim.get("claim", "")}"
|
||||
Respond: {{"contradicts": true/false, "why": "explanation", "severity": "minor/major"}}'''
|
||||
|
||||
response = query_llm(prompt) or query_ollama_cli(prompt)
|
||||
json_match = re.search(r'\{[\s\S]*?\}', response)
|
||||
|
||||
if json_match:
|
||||
try:
|
||||
check = json.loads(json_match.group(0))
|
||||
if check.get('contradicts'):
|
||||
return {**check, 'original': old}
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# --- Commands ---
|
||||
|
||||
def cmd_scan(hours: int = 24, jsonl: Optional[str] = None) -> None:
|
||||
"""Scan recent messages for commitments and claims."""
|
||||
db = load_db()
|
||||
cutoff = int((datetime.now() - timedelta(hours=hours)).timestamp() * 1000)
|
||||
|
||||
print(f"🧠 Brain Tracker — Scanning last {hours}h\n", file=sys.stderr)
|
||||
|
||||
# Fetch events
|
||||
if jsonl:
|
||||
all_events = load_jsonl_events(jsonl, hours)
|
||||
else:
|
||||
print("📥 Fetching messages...", file=sys.stderr)
|
||||
in_events = fetch_nats_events('openclaw.events.main.conversation_message_in', 200)
|
||||
out_events = fetch_nats_events('openclaw.events.main.conversation_message_out', 200)
|
||||
all_events = in_events + out_events
|
||||
|
||||
# Filter and sort
|
||||
events = [
|
||||
e for e in all_events
|
||||
if e.get('timestamp', 0) >= cutoff and e.get('id') not in db['processedIds']
|
||||
]
|
||||
events.sort(key=lambda x: x.get('timestamp', 0))
|
||||
|
||||
print(f" Found {len(events)} new events to analyze\n", file=sys.stderr)
|
||||
|
||||
commitments = 0
|
||||
claims = 0
|
||||
contradictions = 0
|
||||
|
||||
for i, event in enumerate(events):
|
||||
text = extract_text(event)
|
||||
clean = clean_text(text)
|
||||
|
||||
if len(clean) < 20:
|
||||
db['processedIds'].add(event.get('id', ''))
|
||||
continue
|
||||
|
||||
print(f"\r[{i+1}/{len(events)}] Analyzing...", end='', file=sys.stderr)
|
||||
|
||||
analysis = analyze_message(text, db['claims'])
|
||||
|
||||
if not analysis:
|
||||
db['processedIds'].add(event.get('id', ''))
|
||||
continue
|
||||
|
||||
date = datetime.fromtimestamp(event.get('timestamp', 0) / 1000).strftime('%Y-%m-%d')
|
||||
|
||||
# Process commitment
|
||||
if analysis.get('commitment', {}).get('found'):
|
||||
c = analysis['commitment']
|
||||
import random
|
||||
import string
|
||||
commit_id = f"commit-{int(datetime.now().timestamp())}-{''.join(random.choices(string.ascii_lowercase, k=4))}"
|
||||
|
||||
db['commitments'].append({
|
||||
'id': commit_id,
|
||||
'what': c.get('what', ''),
|
||||
'who': c.get('who', ''),
|
||||
'deadline': c.get('deadline'),
|
||||
'source': clean[:100],
|
||||
'date': date,
|
||||
'status': 'open',
|
||||
})
|
||||
commitments += 1
|
||||
print(f"\n✅ Commitment: \"{c.get('what', '')}\"", file=sys.stderr)
|
||||
|
||||
# Process claims
|
||||
for claim in analysis.get('claims', []):
|
||||
if not claim.get('claim'):
|
||||
continue
|
||||
|
||||
import random
|
||||
import string
|
||||
claim_id = f"claim-{int(datetime.now().timestamp())}-{''.join(random.choices(string.ascii_lowercase, k=4))}"
|
||||
|
||||
new_claim = {
|
||||
'id': claim_id,
|
||||
'claim': claim['claim'],
|
||||
'topic': claim.get('topic', 'general'),
|
||||
'entities': claim.get('entities', []),
|
||||
'source': clean[:100],
|
||||
'date': date,
|
||||
}
|
||||
|
||||
# Check for contradictions
|
||||
contra = check_contradiction(new_claim, db['claims'])
|
||||
if contra:
|
||||
db['contradictions'].append({
|
||||
'id': f"contra-{int(datetime.now().timestamp())}",
|
||||
'new_claim': new_claim,
|
||||
'old_claim': contra['original'],
|
||||
'why': contra.get('why', ''),
|
||||
'severity': contra.get('severity', 'unknown'),
|
||||
'date': date,
|
||||
'status': 'unresolved',
|
||||
})
|
||||
contradictions += 1
|
||||
print(f"\n⚠️ CONTRADICTION: {contra.get('why', '')}", file=sys.stderr)
|
||||
|
||||
db['claims'].append(new_claim)
|
||||
claims += 1
|
||||
|
||||
db['processedIds'].add(event.get('id', ''))
|
||||
|
||||
# Save periodically
|
||||
if i % 10 == 0:
|
||||
save_db(db)
|
||||
|
||||
db['lastRun'] = datetime.now().isoformat()
|
||||
save_db(db)
|
||||
|
||||
print(f"\n\n📊 Results:", file=sys.stderr)
|
||||
print(f" New Commitments: {commitments}", file=sys.stderr)
|
||||
print(f" New Claims: {claims}", file=sys.stderr)
|
||||
print(f" Contradictions: {contradictions}", file=sys.stderr)
|
||||
print(f" Total in DB: {len(db['commitments'])} commitments, {len(db['claims'])} claims", file=sys.stderr)
|
||||
|
||||
|
||||
def cmd_list() -> None:
|
||||
"""List open commitments and unresolved contradictions."""
|
||||
db = load_db()
|
||||
|
||||
open_commits = [c for c in db.get('commitments', []) if c.get('status') == 'open']
|
||||
unresolved = [c for c in db.get('contradictions', []) if c.get('status') == 'unresolved']
|
||||
|
||||
print("🧠 Brain Tracker Status\n")
|
||||
|
||||
if open_commits:
|
||||
print(f"📋 Open Commitments ({len(open_commits)}):\n")
|
||||
for c in open_commits:
|
||||
print(f" [{c['id'][-6:]}] {c.get('what', '')}")
|
||||
deadline = f" | ⏰ {c['deadline']}" if c.get('deadline') else ''
|
||||
print(f" 👤 {c.get('who', 'unknown')} | 📅 {c.get('date', '')}{deadline}\n")
|
||||
else:
|
||||
print("✅ No open commitments\n")
|
||||
|
||||
if unresolved:
|
||||
print(f"⚠️ Contradictions ({len(unresolved)}):\n")
|
||||
for c in unresolved:
|
||||
severity = c.get('severity', 'UNKNOWN').upper()
|
||||
print(f" [{severity}] {c.get('why', '')}")
|
||||
print(f" Old: \"{c.get('old_claim', {}).get('claim', '')}\"")
|
||||
print(f" New: \"{c.get('new_claim', {}).get('claim', '')}\"\n")
|
||||
else:
|
||||
print("✅ No contradictions\n")
|
||||
|
||||
print(f"📊 Total: {len(db.get('claims', []))} claims, {len(db.get('commitments', []))} commitments")
|
||||
|
||||
|
||||
def cmd_done(id_fragment: str) -> None:
|
||||
"""Mark a commitment as done."""
|
||||
db = load_db()
|
||||
|
||||
commit = next((c for c in db['commitments'] if id_fragment in c['id']), None)
|
||||
if commit:
|
||||
commit['status'] = 'done'
|
||||
commit['completed'] = datetime.now().isoformat()
|
||||
save_db(db)
|
||||
print(f"✅ Marked done: {commit.get('what', '')}")
|
||||
else:
|
||||
print(f"❌ Not found: {id_fragment}")
|
||||
|
||||
|
||||
def cmd_check(statement: str) -> None:
|
||||
"""Check a statement against existing claims."""
|
||||
db = load_db()
|
||||
q = statement.lower()
|
||||
|
||||
matches = [
|
||||
c for c in db.get('claims', [])
|
||||
if q in c.get('claim', '').lower() or
|
||||
any(q in e.lower() for e in c.get('entities', []))
|
||||
]
|
||||
|
||||
if not matches:
|
||||
print(f"No claims matching: {statement}")
|
||||
return
|
||||
|
||||
print(f"🔍 Claims matching \"{statement}\" ({len(matches)}):\n")
|
||||
for c in matches[-15:]:
|
||||
print(f"[{c.get('date', '')}] {c.get('claim', '')}")
|
||||
if c.get('entities'):
|
||||
print(f" Entities: {', '.join(c['entities'])}")
|
||||
print()
|
||||
|
||||
|
||||
# --- Main ---
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Commitment and Contradiction Detection',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Commands:
|
||||
scan Analyze recent messages for commitments and claims
|
||||
list Show open commitments and contradictions
|
||||
done Mark a commitment as completed
|
||||
check Search claims for a statement
|
||||
|
||||
Examples:
|
||||
cortex track scan --since 24h
|
||||
cortex track list
|
||||
cortex track done commit-abc123
|
||||
cortex track check "TypeDB"
|
||||
'''
|
||||
)
|
||||
|
||||
sub = parser.add_subparsers(dest='command')
|
||||
|
||||
# scan
|
||||
scan_p = sub.add_parser('scan', help='Scan messages for commitments/claims')
|
||||
scan_p.add_argument('--since', default='24h',
|
||||
help='Time window (e.g. 24h, 7d)')
|
||||
scan_p.add_argument('--jsonl', metavar='FILE',
|
||||
help='Load events from JSONL file')
|
||||
|
||||
# list
|
||||
sub.add_parser('list', help='Show open items')
|
||||
|
||||
# done
|
||||
done_p = sub.add_parser('done', help='Mark commitment as done')
|
||||
done_p.add_argument('id', help='Commitment ID (or fragment)')
|
||||
|
||||
# check
|
||||
check_p = sub.add_parser('check', help='Search claims')
|
||||
check_p.add_argument('statement', nargs='+', help='Statement to search')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'scan':
|
||||
# Parse duration
|
||||
match = re.match(r'^(\d+)([hdm])$', args.since.lower())
|
||||
if not match:
|
||||
print(f"❌ Invalid duration: {args.since}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
value = int(match.group(1))
|
||||
unit = match.group(2)
|
||||
hours = value if unit == 'h' else (value * 24 if unit == 'd' else value / 60)
|
||||
|
||||
cmd_scan(int(hours), args.jsonl)
|
||||
|
||||
elif args.command == 'list':
|
||||
cmd_list()
|
||||
|
||||
elif args.command == 'done':
|
||||
cmd_done(args.id)
|
||||
|
||||
elif args.command == 'check':
|
||||
cmd_check(' '.join(args.statement))
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
436
tests/test_new_modules.py
Normal file
436
tests/test_new_modules.py
Normal file
|
|
@ -0,0 +1,436 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Tests for new cortex modules: learn, context, tracker, sentinel.
|
||||
|
||||
Run with: CORTEX_HOME=~/clawd python3 -m pytest tests/test_new_modules.py -v
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
# Set up CORTEX_HOME before importing modules
|
||||
os.environ.setdefault('CORTEX_HOME', os.path.expanduser('~/clawd'))
|
||||
os.environ.setdefault('CORTEX_MEMORY_DIR', os.path.expanduser('~/clawd/memory'))
|
||||
|
||||
from cortex import learn, context, tracker, sentinel
|
||||
from cortex.config import cortex_home, memory_dir
|
||||
|
||||
|
||||
# --- Fixtures ---
|
||||
|
||||
@pytest.fixture
|
||||
def temp_cortex_home(tmp_path):
|
||||
"""Temporary CORTEX_HOME for isolated tests."""
|
||||
old_home = os.environ.get('CORTEX_HOME')
|
||||
old_memory = os.environ.get('CORTEX_MEMORY_DIR')
|
||||
|
||||
os.environ['CORTEX_HOME'] = str(tmp_path)
|
||||
os.environ['CORTEX_MEMORY_DIR'] = str(tmp_path / 'memory')
|
||||
|
||||
yield tmp_path
|
||||
|
||||
if old_home:
|
||||
os.environ['CORTEX_HOME'] = old_home
|
||||
else:
|
||||
os.environ.pop('CORTEX_HOME', None)
|
||||
if old_memory:
|
||||
os.environ['CORTEX_MEMORY_DIR'] = old_memory
|
||||
else:
|
||||
os.environ.pop('CORTEX_MEMORY_DIR', None)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_events():
|
||||
"""Sample NATS events for testing."""
|
||||
now = int(datetime.now().timestamp() * 1000)
|
||||
return [
|
||||
{
|
||||
'id': 'evt-001',
|
||||
'timestamp': now - 3600000, # 1 hour ago
|
||||
'type': 'conversation_message_in',
|
||||
'payload': {
|
||||
'data': {'text': 'Das war mega! Super gemacht 👍'}
|
||||
}
|
||||
},
|
||||
{
|
||||
'id': 'evt-002',
|
||||
'timestamp': now - 7200000, # 2 hours ago
|
||||
'type': 'conversation_message_out',
|
||||
'payload': {
|
||||
'data': {'text': 'Ich werde das TypeScript Projekt morgen fertigstellen.'}
|
||||
}
|
||||
},
|
||||
{
|
||||
'id': 'evt-003',
|
||||
'timestamp': now - 10800000, # 3 hours ago
|
||||
'type': 'tool_call',
|
||||
'payload': {
|
||||
'data': {'name': 'Read', 'text': 'Reading file...'}
|
||||
}
|
||||
},
|
||||
{
|
||||
'id': 'evt-004',
|
||||
'timestamp': now - 14400000, # 4 hours ago
|
||||
'type': 'conversation_message_in',
|
||||
'payload': {
|
||||
'data': {'text': 'Nein, ich meinte eigentlich Python, nicht JavaScript.'}
|
||||
}
|
||||
},
|
||||
{
|
||||
'id': 'evt-005',
|
||||
'timestamp': now - 18000000, # 5 hours ago
|
||||
'type': 'error',
|
||||
'payload': {
|
||||
'data': {'text': 'Error: Module not found in Node.js project'}
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_alerts():
|
||||
"""Sample security alerts for testing."""
|
||||
return [
|
||||
{
|
||||
'id': 'alert-001',
|
||||
'source': 'bleepingcomputer',
|
||||
'category': 'security-news',
|
||||
'title': 'Critical OpenSSH vulnerability allows remote code execution',
|
||||
'link': 'https://example.com/openssh-vuln',
|
||||
'summary': 'A critical RCE vulnerability in OpenSSH...',
|
||||
'severity': 'critical',
|
||||
'relevant': True,
|
||||
},
|
||||
{
|
||||
'id': 'alert-002',
|
||||
'source': 'hackernews',
|
||||
'category': 'security-news',
|
||||
'title': 'New Docker container escape vulnerability discovered',
|
||||
'link': 'https://example.com/docker-escape',
|
||||
'summary': 'Researchers found a container escape bug in Docker...',
|
||||
'severity': 'high',
|
||||
'relevant': True,
|
||||
},
|
||||
{
|
||||
'id': 'alert-003',
|
||||
'source': 'schneier',
|
||||
'category': 'security-news',
|
||||
'title': 'Thoughts on Password Managers',
|
||||
'link': 'https://example.com/password-mgr',
|
||||
'summary': 'Discussion about password manager security...',
|
||||
'severity': 'info',
|
||||
'relevant': False,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# --- Learn Module Tests ---
|
||||
|
||||
class TestLearn:
|
||||
"""Tests for cortex.learn module."""
|
||||
|
||||
def test_parse_duration_hours(self):
|
||||
"""Test duration parsing for hours."""
|
||||
delta = learn.parse_duration('24h')
|
||||
assert delta == timedelta(hours=24)
|
||||
|
||||
def test_parse_duration_days(self):
|
||||
"""Test duration parsing for days."""
|
||||
delta = learn.parse_duration('7d')
|
||||
assert delta == timedelta(days=7)
|
||||
|
||||
def test_parse_duration_minutes(self):
|
||||
"""Test duration parsing for minutes."""
|
||||
delta = learn.parse_duration('30m')
|
||||
assert delta == timedelta(minutes=30)
|
||||
|
||||
def test_parse_duration_invalid(self):
|
||||
"""Test invalid duration format."""
|
||||
with pytest.raises(ValueError):
|
||||
learn.parse_duration('invalid')
|
||||
|
||||
def test_extract_text_from_payload(self):
|
||||
"""Test text extraction from various event formats."""
|
||||
event = {'payload': {'data': {'text': 'Hello world'}}}
|
||||
assert learn.extract_text(event) == 'Hello world'
|
||||
|
||||
def test_extract_text_from_content(self):
|
||||
"""Test text extraction from content field."""
|
||||
event = {'payload': {'content': 'Test content'}}
|
||||
assert learn.extract_text(event) == 'Test content'
|
||||
|
||||
def test_extract_signals_positive(self, sample_events):
|
||||
"""Test extraction of positive signals."""
|
||||
signals = learn.extract_signals(sample_events)
|
||||
assert len(signals['positive']) >= 1
|
||||
# Should detect "mega" and "Super gemacht"
|
||||
|
||||
def test_extract_signals_corrections(self, sample_events):
|
||||
"""Test extraction of correction signals."""
|
||||
signals = learn.extract_signals(sample_events)
|
||||
assert len(signals['corrections']) >= 1
|
||||
# Should detect "Nein, ich meinte eigentlich"
|
||||
|
||||
def test_extract_signals_skill_gaps(self, sample_events):
|
||||
"""Test extraction of skill gaps."""
|
||||
signals = learn.extract_signals(sample_events)
|
||||
assert 'node' in signals['skill_gaps'] or 'nodejs' in signals['skill_gaps'].keys()
|
||||
|
||||
def test_derive_preferences(self, sample_events):
|
||||
"""Test preference derivation."""
|
||||
signals = learn.extract_signals(sample_events)
|
||||
prefs = learn.derive_preferences(signals, {})
|
||||
|
||||
assert 'lastAnalysis' in prefs
|
||||
assert 'signalCounts' in prefs
|
||||
assert 'explicit' in prefs
|
||||
|
||||
def test_run_learning_cycle(self, temp_cortex_home, sample_events):
|
||||
"""Test full learning cycle."""
|
||||
results = learn.run_learning_cycle(sample_events, 'test')
|
||||
|
||||
assert 'preferences' in results
|
||||
assert 'behaviors' in results
|
||||
assert 'topics' in results
|
||||
assert 'skill_gaps' in results
|
||||
|
||||
# Check files were created
|
||||
ldir = temp_cortex_home / 'learning' / 'test'
|
||||
assert (ldir / 'preferences.json').exists()
|
||||
assert (ldir / 'behaviors.json').exists()
|
||||
assert (ldir / 'learning-context.md').exists()
|
||||
|
||||
|
||||
# --- Context Module Tests ---
|
||||
|
||||
class TestContext:
|
||||
"""Tests for cortex.context module."""
|
||||
|
||||
def test_extract_text(self):
|
||||
"""Test text extraction from events."""
|
||||
event = {'payload': {'data': {'text': 'Test message'}}}
|
||||
text = context.extract_text(event)
|
||||
assert text == 'Test message'
|
||||
|
||||
def test_analyze_events_topics(self, sample_events):
|
||||
"""Test topic detection in events."""
|
||||
patterns = context.analyze_events(sample_events)
|
||||
# Should have detected topics based on keywords
|
||||
assert isinstance(patterns['topics'], dict)
|
||||
|
||||
def test_analyze_events_tools(self, sample_events):
|
||||
"""Test tool tracking."""
|
||||
patterns = context.analyze_events(sample_events)
|
||||
assert 'Read' in patterns['tools']
|
||||
|
||||
def test_analyze_events_languages(self, sample_events):
|
||||
"""Test language detection."""
|
||||
patterns = context.analyze_events(sample_events)
|
||||
# Should detect German (more German words in samples)
|
||||
assert patterns['languages']['de'] > 0
|
||||
|
||||
def test_generate_context_md(self, sample_events):
|
||||
"""Test markdown generation."""
|
||||
patterns = context.analyze_events(sample_events)
|
||||
md = context.generate_context_md(patterns, len(sample_events), None)
|
||||
|
||||
assert '# Learning Context' in md
|
||||
assert 'User Preferences' in md
|
||||
assert 'Most Used Tools' in md
|
||||
|
||||
def test_run_context_generation(self, temp_cortex_home, sample_events):
|
||||
"""Test full context generation."""
|
||||
patterns = context.run_context_generation(sample_events, agent='test')
|
||||
|
||||
# Check output file
|
||||
output = temp_cortex_home / 'learning' / 'test' / 'learning-context.md'
|
||||
assert output.exists()
|
||||
content = output.read_text()
|
||||
assert '# Learning Context' in content
|
||||
|
||||
|
||||
# --- Tracker Module Tests ---
|
||||
|
||||
class TestTracker:
|
||||
"""Tests for cortex.tracker module."""
|
||||
|
||||
def test_clean_text(self):
|
||||
"""Test text cleaning."""
|
||||
text = "Hello [timestamp] **bold** https://example.com"
|
||||
clean = tracker.clean_text(text)
|
||||
|
||||
assert '[timestamp]' not in clean
|
||||
assert '**' not in clean
|
||||
assert 'https://' not in clean
|
||||
|
||||
def test_extract_text(self):
|
||||
"""Test text extraction from events."""
|
||||
event = {'payload': {'text_preview': [{'text': 'Test'}]}}
|
||||
text = tracker.extract_text(event)
|
||||
assert text == 'Test'
|
||||
|
||||
def test_load_save_db(self, temp_cortex_home):
|
||||
"""Test database load/save."""
|
||||
db = tracker.load_db()
|
||||
|
||||
assert 'commitments' in db
|
||||
assert 'claims' in db
|
||||
assert 'processedIds' in db
|
||||
|
||||
# Add some data
|
||||
db['commitments'].append({'id': 'test-001', 'what': 'Test commitment'})
|
||||
db['processedIds'].add('evt-001')
|
||||
|
||||
tracker.save_db(db)
|
||||
|
||||
# Reload and verify
|
||||
db2 = tracker.load_db()
|
||||
assert len(db2['commitments']) == 1
|
||||
assert 'evt-001' in db2['processedIds']
|
||||
|
||||
@patch('cortex.tracker.query_llm')
|
||||
def test_analyze_message_no_llm(self, mock_llm):
|
||||
"""Test message analysis fallback."""
|
||||
mock_llm.return_value = ''
|
||||
|
||||
result = tracker.analyze_message('Short text', [])
|
||||
assert result is None # Too short after cleaning
|
||||
|
||||
def test_cmd_list(self, temp_cortex_home, capsys):
|
||||
"""Test list command output."""
|
||||
# Create test data
|
||||
db = tracker.load_db()
|
||||
db['commitments'].append({
|
||||
'id': 'commit-test',
|
||||
'what': 'Test commitment',
|
||||
'who': 'user',
|
||||
'date': '2024-01-01',
|
||||
'status': 'open',
|
||||
})
|
||||
tracker.save_db(db)
|
||||
|
||||
tracker.cmd_list()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert 'Test commitment' in captured.out
|
||||
|
||||
|
||||
# --- Sentinel Module Tests ---
|
||||
|
||||
class TestSentinel:
|
||||
"""Tests for cortex.sentinel module."""
|
||||
|
||||
def test_init_db(self, temp_cortex_home):
|
||||
"""Test database initialization."""
|
||||
sentinel.init_db()
|
||||
|
||||
db_path = temp_cortex_home / 'sentinel' / 'sentinel.db'
|
||||
assert db_path.exists()
|
||||
|
||||
def test_add_alert_new(self, temp_cortex_home, sample_alerts):
|
||||
"""Test adding new alerts."""
|
||||
sentinel.init_db()
|
||||
|
||||
alert = sample_alerts[0]
|
||||
is_new = sentinel.add_alert(alert)
|
||||
assert is_new is True
|
||||
|
||||
# Adding same alert again should return False
|
||||
is_new2 = sentinel.add_alert(alert)
|
||||
assert is_new2 is False
|
||||
|
||||
def test_get_stats(self, temp_cortex_home, sample_alerts):
|
||||
"""Test statistics retrieval."""
|
||||
sentinel.init_db()
|
||||
|
||||
for alert in sample_alerts:
|
||||
sentinel.add_alert(alert)
|
||||
|
||||
stats = sentinel.get_stats()
|
||||
|
||||
assert stats['total_alerts'] == 3
|
||||
assert 'by_severity' in stats
|
||||
assert stats['by_severity'].get('critical', 0) == 1
|
||||
|
||||
def test_check_inventory_match(self):
|
||||
"""Test inventory matching."""
|
||||
text = "Critical vulnerability in OpenSSH and Docker"
|
||||
matches = sentinel.check_inventory_match(text)
|
||||
|
||||
# Should match OpenSSH and Docker
|
||||
names = [m['name'] for m in matches]
|
||||
assert 'OpenSSH' in names
|
||||
assert 'Docker' in names
|
||||
|
||||
def test_analyze_matches(self, sample_alerts):
|
||||
"""Test alert analysis for matches."""
|
||||
result = sentinel.analyze_matches(sample_alerts)
|
||||
|
||||
assert 'relevant_alerts' in result
|
||||
assert 'critical_relevant' in result
|
||||
assert 'category_breakdown' in result
|
||||
|
||||
def test_generate_report(self, temp_cortex_home, sample_alerts):
|
||||
"""Test report generation."""
|
||||
sentinel.init_db()
|
||||
|
||||
for alert in sample_alerts:
|
||||
sentinel.add_alert(alert)
|
||||
|
||||
data = sentinel.analyze_matches(sample_alerts)
|
||||
report = sentinel.generate_report(data, use_llm=False)
|
||||
|
||||
assert '# 🔒 Security Sentinel Report' in report
|
||||
assert 'Database Stats' in report
|
||||
|
||||
|
||||
# --- Integration Tests ---
|
||||
|
||||
class TestIntegration:
|
||||
"""Integration tests for module interactions."""
|
||||
|
||||
def test_cli_learn_help(self):
|
||||
"""Test learn command help."""
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
['python3', '-m', 'cortex.learn', '--help'],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
assert 'preferences' in result.stdout.lower() or result.returncode == 0
|
||||
|
||||
def test_cli_context_help(self):
|
||||
"""Test context command help."""
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
['python3', '-m', 'cortex.context', '--help'],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
assert 'events' in result.stdout.lower() or result.returncode == 0
|
||||
|
||||
def test_cli_tracker_help(self):
|
||||
"""Test tracker command help."""
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
['python3', '-m', 'cortex.tracker', '--help'],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
assert 'scan' in result.stdout.lower() or result.returncode == 0
|
||||
|
||||
def test_cli_sentinel_help(self):
|
||||
"""Test sentinel command help."""
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
['python3', '-m', 'cortex.sentinel', '--help'],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
assert 'scan' in result.stdout.lower() or result.returncode == 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Loading…
Reference in a new issue