diff --git a/package-lock.json b/package-lock.json index 79940a7..4b05922 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@vainplex/openclaw-cortex", - "version": "0.1.2", + "version": "0.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@vainplex/openclaw-cortex", - "version": "0.1.2", + "version": "0.2.0", "license": "MIT", "devDependencies": { "@types/node": "^22.0.0", diff --git a/package.json b/package.json index 831b8dd..574ecce 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@vainplex/openclaw-cortex", - "version": "0.1.2", + "version": "0.2.0", "description": "OpenClaw plugin: conversation intelligence — thread tracking, decision extraction, boot context, pre-compaction snapshots", "type": "module", "main": "dist/index.js", diff --git a/src/config.ts b/src/config.ts index cdec9ed..8f5b3bc 100644 --- a/src/config.ts +++ b/src/config.ts @@ -31,6 +31,14 @@ export const DEFAULTS: CortexConfig = { patterns: { language: "both", }, + llm: { + enabled: false, + endpoint: "http://localhost:11434/v1", + model: "mistral:7b", + apiKey: "", + timeoutMs: 15000, + batchSize: 3, + }, }; function bool(value: unknown, fallback: boolean): boolean { @@ -59,6 +67,7 @@ export function resolveConfig(pluginConfig?: Record): CortexCon const pc = (raw.preCompaction ?? {}) as Record; const nr = (raw.narrative ?? {}) as Record; const pt = (raw.patterns ?? {}) as Record; + const lm = (raw.llm ?? {}) as Record; return { enabled: bool(raw.enabled, DEFAULTS.enabled), @@ -91,6 +100,14 @@ export function resolveConfig(pluginConfig?: Record): CortexCon patterns: { language: lang(pt.language), }, + llm: { + enabled: bool(lm.enabled, DEFAULTS.llm.enabled), + endpoint: str(lm.endpoint, DEFAULTS.llm.endpoint), + model: str(lm.model, DEFAULTS.llm.model), + apiKey: str(lm.apiKey, DEFAULTS.llm.apiKey), + timeoutMs: int(lm.timeoutMs, DEFAULTS.llm.timeoutMs), + batchSize: int(lm.batchSize, DEFAULTS.llm.batchSize), + }, }; } diff --git a/src/decision-tracker.ts b/src/decision-tracker.ts index 1b9b0cb..1197ac8 100644 --- a/src/decision-tracker.ts +++ b/src/decision-tracker.ts @@ -156,6 +156,29 @@ export class DecisionTracker { } } + /** + * Add a decision directly (from LLM analysis). Deduplicates and persists. + */ + addDecision(what: string, who: string, impact: ImpactLevel | string): void { + const now = new Date(); + if (this.isDuplicate(what, now)) return; + + const validImpact = (["critical", "high", "medium", "low"].includes(impact) ? impact : "medium") as ImpactLevel; + + this.decisions.push({ + id: randomUUID(), + what: what.slice(0, 200), + date: now.toISOString().slice(0, 10), + why: `LLM-detected decision (${who})`, + impact: validImpact, + who, + extracted_at: now.toISOString(), + }); + + this.enforceMax(); + this.persist(); + } + /** * Get all decisions (in-memory). */ diff --git a/src/hooks.ts b/src/hooks.ts index 1a2146b..fd44f91 100644 --- a/src/hooks.ts +++ b/src/hooks.ts @@ -9,6 +9,7 @@ import { ThreadTracker } from "./thread-tracker.js"; import { DecisionTracker } from "./decision-tracker.js"; import { BootContextGenerator } from "./boot-context.js"; import { PreCompaction } from "./pre-compaction.js"; +import { LlmEnhancer, resolveLlmConfig } from "./llm-enhance.js"; /** * Extract message content from a hook event using the fallback chain. @@ -29,6 +30,7 @@ type HookState = { workspace: string | null; threadTracker: ThreadTracker | null; decisionTracker: DecisionTracker | null; + llmEnhancer: LlmEnhancer | null; }; function ensureInit(state: HookState, config: CortexConfig, logger: OpenClawPluginApi["logger"], ctx?: HookContext): void { @@ -41,20 +43,40 @@ function ensureInit(state: HookState, config: CortexConfig, logger: OpenClawPlug if (!state.decisionTracker && config.decisionTracker.enabled) { state.decisionTracker = new DecisionTracker(state.workspace, config.decisionTracker, config.patterns.language, logger); } + if (!state.llmEnhancer && config.llm.enabled) { + state.llmEnhancer = new LlmEnhancer(config.llm, logger); + } } /** Register message hooks (message_received + message_sent). */ function registerMessageHooks(api: OpenClawPluginApi, config: CortexConfig, state: HookState): void { if (!config.threadTracker.enabled && !config.decisionTracker.enabled) return; - const handler = (event: HookEvent, ctx: HookContext, senderOverride?: string) => { + const handler = async (event: HookEvent, ctx: HookContext, senderOverride?: string) => { try { ensureInit(state, config, api.logger, ctx); const content = extractContent(event); const sender = senderOverride ?? extractSender(event); if (!content) return; + + // Regex-based processing (always runs — zero cost) if (config.threadTracker.enabled && state.threadTracker) state.threadTracker.processMessage(content, sender); if (config.decisionTracker.enabled && state.decisionTracker) state.decisionTracker.processMessage(content, sender); + + // LLM enhancement (optional — batched, async, fire-and-forget) + if (state.llmEnhancer) { + const role = senderOverride ? "assistant" as const : "user" as const; + const analysis = await state.llmEnhancer.addMessage(content, sender, role); + if (analysis) { + // Apply LLM findings on top of regex results + if (state.threadTracker) state.threadTracker.applyLlmAnalysis(analysis); + if (state.decisionTracker) { + for (const dec of analysis.decisions) { + state.decisionTracker.addDecision(dec.what, dec.who, dec.impact); + } + } + } + } } catch (err) { api.logger.warn(`[cortex] message hook error: ${err}`); } @@ -109,13 +131,13 @@ function registerCompactionHooks(api: OpenClawPluginApi, config: CortexConfig, s * Each handler is wrapped in try/catch — never throws. */ export function registerCortexHooks(api: OpenClawPluginApi, config: CortexConfig): void { - const state: HookState = { workspace: null, threadTracker: null, decisionTracker: null }; + const state: HookState = { workspace: null, threadTracker: null, decisionTracker: null, llmEnhancer: null }; registerMessageHooks(api, config, state); registerSessionHooks(api, config, state); registerCompactionHooks(api, config, state); api.logger.info( - `[cortex] Hooks registered — threads:${config.threadTracker.enabled} decisions:${config.decisionTracker.enabled} boot:${config.bootContext.enabled} compaction:${config.preCompaction.enabled}`, + `[cortex] Hooks registered — threads:${config.threadTracker.enabled} decisions:${config.decisionTracker.enabled} boot:${config.bootContext.enabled} compaction:${config.preCompaction.enabled} llm:${config.llm.enabled}${config.llm.enabled ? ` (${config.llm.model}@${config.llm.endpoint})` : ""}`, ); } diff --git a/src/llm-enhance.ts b/src/llm-enhance.ts new file mode 100644 index 0000000..cb8072b --- /dev/null +++ b/src/llm-enhance.ts @@ -0,0 +1,258 @@ +import { request } from "node:http"; +import { URL } from "node:url"; +import type { PluginLogger } from "./types.js"; + +/** + * LLM Enhancement — optional AI-powered analysis layered on top of regex patterns. + * + * When enabled, sends conversation snippets to a local or remote LLM for deeper + * thread/decision/closure detection. Falls back gracefully to regex-only on failure. + * + * Supports any OpenAI-compatible API (Ollama, vLLM, OpenRouter, OpenAI, etc.) + */ + +export type LlmConfig = { + enabled: boolean; + /** OpenAI-compatible endpoint, e.g. "http://localhost:11434/v1" */ + endpoint: string; + /** Model identifier, e.g. "mistral:7b" or "gpt-4o-mini" */ + model: string; + /** API key (optional, for cloud providers) */ + apiKey: string; + /** Timeout in ms for LLM calls */ + timeoutMs: number; + /** Minimum message count before triggering LLM (batches for efficiency) */ + batchSize: number; +}; + +export const LLM_DEFAULTS: LlmConfig = { + enabled: false, + endpoint: "http://localhost:11434/v1", + model: "mistral:7b", + apiKey: "", + timeoutMs: 15000, + batchSize: 3, +}; + +export type LlmAnalysis = { + threads: Array<{ + title: string; + status: "open" | "closed"; + summary?: string; + }>; + decisions: Array<{ + what: string; + who: string; + impact: "high" | "medium" | "low"; + }>; + closures: string[]; + mood: string; +}; + +const SYSTEM_PROMPT = `You are a conversation analyst. Given a snippet of conversation between a user and an AI assistant, extract: + +1. **threads**: Active topics being discussed. Each has a title (short, specific) and status (open/closed). +2. **decisions**: Any decisions made. Include what was decided, who decided, and impact (high/medium/low). +3. **closures**: Thread titles that were completed/resolved in this snippet. +4. **mood**: Overall conversation mood (neutral/frustrated/excited/tense/productive/exploratory). + +Rules: +- Only extract REAL topics, not meta-conversation ("how are you", greetings, etc.) +- Thread titles should be specific and actionable ("auth migration to OAuth2", not "the thing") +- Decisions must be actual commitments, not questions or suggestions +- Be conservative — when in doubt, don't extract + +Respond ONLY with valid JSON matching this schema: +{"threads":[{"title":"...","status":"open|closed","summary":"..."}],"decisions":[{"what":"...","who":"...","impact":"high|medium|low"}],"closures":["thread title"],"mood":"neutral"}`; + +/** + * Call an OpenAI-compatible chat completion API. + */ +function callLlm( + config: LlmConfig, + messages: Array<{ role: string; content: string }>, + logger: PluginLogger, +): Promise { + return new Promise((resolve) => { + try { + const url = new URL(`${config.endpoint}/chat/completions`); + const body = JSON.stringify({ + model: config.model, + messages, + temperature: 0.1, + max_tokens: 1000, + response_format: { type: "json_object" }, + }); + + const headers: Record = { + "Content-Type": "application/json", + "Content-Length": String(Buffer.byteLength(body)), + }; + if (config.apiKey) { + headers["Authorization"] = `Bearer ${config.apiKey}`; + } + + const proto = url.protocol === "https:" ? require("node:https") : require("node:http"); + const req = proto.request( + { + hostname: url.hostname, + port: url.port || (url.protocol === "https:" ? 443 : 80), + path: url.pathname, + method: "POST", + headers, + timeout: config.timeoutMs, + }, + (res: any) => { + let data = ""; + res.on("data", (chunk: string) => (data += chunk)); + res.on("end", () => { + try { + const parsed = JSON.parse(data); + const content = parsed?.choices?.[0]?.message?.content; + resolve(content ?? null); + } catch { + logger.warn(`[cortex-llm] Failed to parse LLM response`); + resolve(null); + } + }); + }, + ); + + req.on("error", (err: Error) => { + logger.warn(`[cortex-llm] Request error: ${err.message}`); + resolve(null); + }); + + req.on("timeout", () => { + req.destroy(); + logger.warn(`[cortex-llm] Request timed out (${config.timeoutMs}ms)`); + resolve(null); + }); + + req.write(body); + req.end(); + } catch (err) { + logger.warn(`[cortex-llm] Exception: ${err}`); + resolve(null); + } + }); +} + +/** + * Parse LLM JSON response into structured analysis. + * Returns null on any parse failure (graceful degradation). + */ +function parseAnalysis(raw: string, logger: PluginLogger): LlmAnalysis | null { + try { + const parsed = JSON.parse(raw); + return { + threads: Array.isArray(parsed.threads) + ? parsed.threads.filter( + (t: any) => typeof t.title === "string" && t.title.length > 2, + ) + : [], + decisions: Array.isArray(parsed.decisions) + ? parsed.decisions.filter( + (d: any) => typeof d.what === "string" && d.what.length > 5, + ) + : [], + closures: Array.isArray(parsed.closures) + ? parsed.closures.filter((c: any) => typeof c === "string") + : [], + mood: typeof parsed.mood === "string" ? parsed.mood : "neutral", + }; + } catch { + logger.warn(`[cortex-llm] Failed to parse analysis JSON`); + return null; + } +} + +/** + * Message buffer for batching LLM calls. + */ +export class LlmEnhancer { + private buffer: Array<{ role: string; content: string; sender: string }> = []; + private readonly config: LlmConfig; + private readonly logger: PluginLogger; + private lastCallMs = 0; + private readonly cooldownMs = 5000; + + constructor(config: LlmConfig, logger: PluginLogger) { + this.config = config; + this.logger = logger; + } + + /** + * Buffer a message. Returns analysis when batch is full, null otherwise. + */ + async addMessage( + content: string, + sender: string, + role: "user" | "assistant", + ): Promise { + if (!this.config.enabled) return null; + + this.buffer.push({ role, content, sender }); + + if (this.buffer.length < this.config.batchSize) return null; + + // Cooldown check + const now = Date.now(); + if (now - this.lastCallMs < this.cooldownMs) return null; + this.lastCallMs = now; + + // Flush buffer + const batch = this.buffer.splice(0); + return this.analyze(batch); + } + + /** + * Force-analyze remaining buffer (e.g. before compaction). + */ + async flush(): Promise { + if (!this.config.enabled || this.buffer.length === 0) return null; + const batch = this.buffer.splice(0); + return this.analyze(batch); + } + + private async analyze( + messages: Array<{ role: string; content: string; sender: string }>, + ): Promise { + const snippet = messages + .map((m) => `[${m.sender}]: ${m.content}`) + .join("\n\n"); + + const raw = await callLlm( + this.config, + [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: snippet }, + ], + this.logger, + ); + + if (!raw) return null; + + const analysis = parseAnalysis(raw, this.logger); + if (analysis) { + const stats = `threads=${analysis.threads.length} decisions=${analysis.decisions.length} closures=${analysis.closures.length}`; + this.logger.info(`[cortex-llm] Analysis: ${stats}`); + } + return analysis; + } +} + +/** + * Resolve LLM config from plugin config. + */ +export function resolveLlmConfig(raw?: Record): LlmConfig { + if (!raw) return { ...LLM_DEFAULTS }; + return { + enabled: typeof raw.enabled === "boolean" ? raw.enabled : LLM_DEFAULTS.enabled, + endpoint: typeof raw.endpoint === "string" ? raw.endpoint : LLM_DEFAULTS.endpoint, + model: typeof raw.model === "string" ? raw.model : LLM_DEFAULTS.model, + apiKey: typeof raw.apiKey === "string" ? raw.apiKey : LLM_DEFAULTS.apiKey, + timeoutMs: typeof raw.timeoutMs === "number" ? raw.timeoutMs : LLM_DEFAULTS.timeoutMs, + batchSize: typeof raw.batchSize === "number" ? raw.batchSize : LLM_DEFAULTS.batchSize, + }; +} diff --git a/src/patterns.ts b/src/patterns.ts index e03b685..a03e7e4 100644 --- a/src/patterns.ts +++ b/src/patterns.ts @@ -32,13 +32,23 @@ const WAIT_PATTERNS_DE = [ ]; const TOPIC_PATTERNS_EN = [ - /(?:back to|now about|regarding)\s+(\w[\w\s-]{2,30})/i, + /(?:back to|now about|regarding|let's (?:talk|discuss|look at))\s+(?:the\s+)?(\w[\w\s-]{3,40})/i, ]; const TOPIC_PATTERNS_DE = [ - /(?:zurück zu|jetzt zu|bzgl\.?|wegen)\s+(\w[\w\s-]{2,30})/i, + /(?:zurück zu|jetzt zu|bzgl\.?|wegen|lass uns (?:über|mal))\s+(?:dem?|die|das)?\s*(\w[\w\s-]{3,40})/i, ]; +/** Words that should never be thread titles (noise filter) */ +const TOPIC_BLACKLIST = new Set([ + "it", "that", "this", "the", "them", "what", "which", "there", + "das", "die", "der", "es", "was", "hier", "dort", + "nothing", "something", "everything", "nichts", "etwas", "alles", + "me", "you", "him", "her", "us", "mir", "dir", "ihm", "uns", + "today", "tomorrow", "yesterday", "heute", "morgen", "gestern", + "noch", "schon", "jetzt", "dann", "also", "aber", "oder", +]); + const MOOD_PATTERNS: Record, RegExp> = { frustrated: /(?:fuck|shit|mist|nervig|genervt|damn|wtf|argh|schon wieder|zum kotzen|sucks)/i, excited: /(?:geil|nice|awesome|krass|boom|läuft|yes!|🎯|🚀|perfekt|brilliant|mega|sick)/i, @@ -115,6 +125,24 @@ export function detectMood(text: string): Mood { return lastMood; } +/** + * Check if a topic candidate is noise (too short, blacklisted, or garbage). + */ +export function isNoiseTopic(topic: string): boolean { + const trimmed = topic.trim(); + if (trimmed.length < 4) return true; + // Single word that's in blacklist + const words = trimmed.toLowerCase().split(/\s+/); + if (words.length === 1 && TOPIC_BLACKLIST.has(words[0])) return true; + // All words are blacklisted + if (words.every(w => TOPIC_BLACKLIST.has(w) || w.length < 3)) return true; + // Looks like a sentence fragment (starts with pronoun or blacklisted word) + if (/^(ich|i|we|wir|du|er|sie|he|she|it|es|nichts|nothing|etwas|something)\s/i.test(trimmed)) return true; + // Contains line breaks or is too long for a title + if (trimmed.includes("\n") || trimmed.length > 60) return true; + return false; +} + /** High-impact keywords for decision impact inference */ export const HIGH_IMPACT_KEYWORDS = [ "architecture", "architektur", "security", "sicherheit", diff --git a/src/thread-tracker.ts b/src/thread-tracker.ts index 0ee425d..6841342 100644 --- a/src/thread-tracker.ts +++ b/src/thread-tracker.ts @@ -7,7 +7,7 @@ import type { ThreadPriority, PluginLogger, } from "./types.js"; -import { getPatterns, detectMood, HIGH_IMPACT_KEYWORDS } from "./patterns.js"; +import { getPatterns, detectMood, HIGH_IMPACT_KEYWORDS, isNoiseTopic } from "./patterns.js"; import type { PatternLanguage } from "./patterns.js"; import { loadJson, saveJson, rebootDir, ensureRebootDir } from "./storage.js"; @@ -127,9 +127,10 @@ export class ThreadTracker { this.sessionMood = data.session_mood ?? "neutral"; } - /** Create new threads from topic signals. */ + /** Create new threads from topic signals (with noise filtering). */ private createFromTopics(topics: string[], sender: string, mood: string, now: string): void { for (const topic of topics) { + if (isNoiseTopic(topic)) continue; const exists = this.threads.some( t => t.title.toLowerCase() === topic.toLowerCase() || matchesThread(t, topic), ); @@ -143,6 +144,52 @@ export class ThreadTracker { } } + /** + * Apply LLM analysis results — creates threads, closes threads, adds decisions. + * Called from hooks when LLM enhance is enabled. + */ + applyLlmAnalysis(analysis: { + threads: Array<{ title: string; status: "open" | "closed"; summary?: string }>; + closures: string[]; + mood: string; + }): void { + const now = new Date().toISOString(); + + // Create threads from LLM + for (const lt of analysis.threads) { + if (isNoiseTopic(lt.title)) continue; + const exists = this.threads.some( + t => t.title.toLowerCase() === lt.title.toLowerCase() || matchesThread(t, lt.title), + ); + if (!exists) { + this.threads.push({ + id: randomUUID(), title: lt.title, status: lt.status, + priority: inferPriority(lt.title), summary: lt.summary ?? "LLM-detected", + decisions: [], waiting_for: null, mood: analysis.mood ?? "neutral", + last_activity: now, created: now, + }); + } + } + + // Close threads from LLM closures + for (const closure of analysis.closures) { + for (const thread of this.threads) { + if (thread.status === "open" && matchesThread(thread, closure)) { + thread.status = "closed"; + thread.last_activity = now; + } + } + } + + // Update session mood + if (analysis.mood && analysis.mood !== "neutral") { + this.sessionMood = analysis.mood; + } + + this.dirty = true; + this.persist(); + } + /** Close threads matching closure signals. */ private closeMatching(content: string, closures: boolean[], now: string): void { if (closures.length === 0) return; diff --git a/src/types.ts b/src/types.ts index 12ef733..2e71adc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -245,6 +245,14 @@ export type CortexConfig = { patterns: { language: "en" | "de" | "both"; }; + llm: { + enabled: boolean; + endpoint: string; + model: string; + apiKey: string; + timeoutMs: number; + batchSize: number; + }; }; // ============================================================ diff --git a/test/llm-enhance.test.ts b/test/llm-enhance.test.ts new file mode 100644 index 0000000..0c86fc1 --- /dev/null +++ b/test/llm-enhance.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect } from "vitest"; +import { resolveLlmConfig, LlmEnhancer, LLM_DEFAULTS } from "../src/llm-enhance.js"; + +const mockLogger = { + info: () => {}, + warn: () => {}, + error: () => {}, + debug: () => {}, +}; + +describe("resolveLlmConfig", () => { + it("returns defaults when no config provided", () => { + const config = resolveLlmConfig(undefined); + expect(config).toEqual(LLM_DEFAULTS); + expect(config.enabled).toBe(false); + }); + + it("returns defaults for empty object", () => { + const config = resolveLlmConfig({}); + expect(config).toEqual(LLM_DEFAULTS); + }); + + it("merges partial config with defaults", () => { + const config = resolveLlmConfig({ + enabled: true, + model: "qwen2.5:7b", + }); + expect(config.enabled).toBe(true); + expect(config.model).toBe("qwen2.5:7b"); + expect(config.endpoint).toBe(LLM_DEFAULTS.endpoint); + expect(config.timeoutMs).toBe(LLM_DEFAULTS.timeoutMs); + expect(config.batchSize).toBe(LLM_DEFAULTS.batchSize); + }); + + it("respects custom endpoint and apiKey", () => { + const config = resolveLlmConfig({ + enabled: true, + endpoint: "https://api.openai.com/v1", + model: "gpt-4o-mini", + apiKey: "sk-test", + timeoutMs: 30000, + batchSize: 5, + }); + expect(config.endpoint).toBe("https://api.openai.com/v1"); + expect(config.apiKey).toBe("sk-test"); + expect(config.timeoutMs).toBe(30000); + expect(config.batchSize).toBe(5); + }); + + it("ignores invalid types", () => { + const config = resolveLlmConfig({ + enabled: "yes" as any, + model: 42 as any, + timeoutMs: "fast" as any, + }); + expect(config.enabled).toBe(LLM_DEFAULTS.enabled); + expect(config.model).toBe(LLM_DEFAULTS.model); + expect(config.timeoutMs).toBe(LLM_DEFAULTS.timeoutMs); + }); +}); + +describe("LlmEnhancer", () => { + it("returns null when disabled", async () => { + const enhancer = new LlmEnhancer({ ...LLM_DEFAULTS, enabled: false }, mockLogger); + const result = await enhancer.addMessage("test message", "user1", "user"); + expect(result).toBeNull(); + }); + + it("buffers messages until batchSize", async () => { + const enhancer = new LlmEnhancer( + { ...LLM_DEFAULTS, enabled: true, batchSize: 3 }, + mockLogger, + ); + // First two messages should buffer (no LLM call) + const r1 = await enhancer.addMessage("hello", "user1", "user"); + expect(r1).toBeNull(); + const r2 = await enhancer.addMessage("world", "assistant", "assistant"); + expect(r2).toBeNull(); + // Third would trigger LLM but will fail gracefully (no server) + const r3 = await enhancer.addMessage("test", "user1", "user"); + // Returns null because localhost:11434 is not guaranteed + // The important thing is it doesn't throw + expect(r3 === null || typeof r3 === "object").toBe(true); + }); + + it("flush returns null when no messages buffered", async () => { + const enhancer = new LlmEnhancer({ ...LLM_DEFAULTS, enabled: true }, mockLogger); + const result = await enhancer.flush(); + expect(result).toBeNull(); + }); + + it("flush returns null when disabled", async () => { + const enhancer = new LlmEnhancer({ ...LLM_DEFAULTS, enabled: false }, mockLogger); + const result = await enhancer.flush(); + expect(result).toBeNull(); + }); +}); diff --git a/test/noise-filter.test.ts b/test/noise-filter.test.ts new file mode 100644 index 0000000..815abf2 --- /dev/null +++ b/test/noise-filter.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from "vitest"; +import { isNoiseTopic } from "../src/patterns.js"; + +describe("isNoiseTopic", () => { + it("rejects short strings", () => { + expect(isNoiseTopic("foo")).toBe(true); + expect(isNoiseTopic("ab")).toBe(true); + expect(isNoiseTopic("")).toBe(true); + }); + + it("rejects single blacklisted words", () => { + expect(isNoiseTopic("that")).toBe(true); + expect(isNoiseTopic("this")).toBe(true); + expect(isNoiseTopic("nichts")).toBe(true); + expect(isNoiseTopic("alles")).toBe(true); + }); + + it("rejects all-blacklisted multi-word", () => { + expect(isNoiseTopic("das was es")).toBe(true); + expect(isNoiseTopic("the that it")).toBe(true); + }); + + it("rejects sentence fragments starting with pronouns", () => { + expect(isNoiseTopic("ich habe nichts gepostet")).toBe(true); + expect(isNoiseTopic("we should do something")).toBe(true); + expect(isNoiseTopic("er hat gesagt")).toBe(true); + expect(isNoiseTopic("I think maybe")).toBe(true); + }); + + it("rejects topics with newlines", () => { + expect(isNoiseTopic("line one\nline two")).toBe(true); + }); + + it("rejects topics longer than 60 chars", () => { + const long = "a".repeat(61); + expect(isNoiseTopic(long)).toBe(true); + }); + + it("accepts valid topic names", () => { + expect(isNoiseTopic("Auth Migration")).toBe(false); + expect(isNoiseTopic("Plugin-Repo Setup")).toBe(false); + expect(isNoiseTopic("NATS Event Store")).toBe(false); + expect(isNoiseTopic("Cortex Demo")).toBe(false); + expect(isNoiseTopic("Security Audit")).toBe(false); + expect(isNoiseTopic("Deployment Pipeline")).toBe(false); + }); + + it("accepts german topic names", () => { + expect(isNoiseTopic("Darkplex Analyse")).toBe(false); + expect(isNoiseTopic("Credential Rotation")).toBe(false); + expect(isNoiseTopic("Thread Tracking Qualität")).toBe(false); + }); + + it("rejects 'nichts gepostet habe' (real-world noise)", () => { + expect(isNoiseTopic("nichts gepostet habe")).toBe(true); + }); +}); diff --git a/test/patterns.test.ts b/test/patterns.test.ts index cbd6ea4..ba0a8c1 100644 --- a/test/patterns.test.ts +++ b/test/patterns.test.ts @@ -260,10 +260,10 @@ describe("topic patterns", () => { expect(anyMatch(topic, "just a random sentence")).toBe(false); }); - it("limits captured topic to 30 chars", () => { - const topics = captureTopics(topic, "back to the very long topic name that exceeds thirty characters limit here"); + it("limits captured topic to 40 chars", () => { + const topics = captureTopics(topic, "back to the very long topic name that exceeds forty characters limit here and keeps going"); if (topics.length > 0) { - expect(topics[0].length).toBeLessThanOrEqual(31); + expect(topics[0].length).toBeLessThanOrEqual(41); } }); }); @@ -274,7 +274,7 @@ describe("topic patterns", () => { it("captures topic after 'zurück zu'", () => { const topics = captureTopics(topic, "Zurück zu der Auth-Migration"); expect(topics.length).toBeGreaterThan(0); - expect(topics[0]).toContain("der Auth-Migration"); + expect(topics[0]).toContain("Auth-Migration"); }); it("captures topic after 'jetzt zu'", () => {