feat: optional LLM enhancement + noise filter for topic detection

- Add llm-enhance.ts: optional OpenAI-compatible LLM for deeper analysis
  - Supports any provider: Ollama, OpenAI, OpenRouter, vLLM, etc.
  - Batched calls (configurable batchSize, default 3 messages)
  - Cooldown + timeout + graceful degradation (falls back to regex)
  - JSON structured output: threads, decisions, closures, mood

- Add noise filter (isNoiseTopic):
  - Rejects short/blacklisted/pronoun-starting fragments
  - Fixes 'nichts gepostet habe' type garbage threads

- Improve patterns:
  - Topic regex: min 3 chars, max 40 (was 2-30)
  - Add 'let's talk/discuss/look at' and 'lass uns über/mal' triggers
  - German patterns handle optional articles (dem/die/das)

- Wire LLM into hooks:
  - Regex runs first (zero cost, always)
  - LLM batches and enhances on top (async, fire-and-forget)
  - ThreadTracker.applyLlmAnalysis() merges LLM findings
  - DecisionTracker.addDecision() for direct LLM-detected decisions

- Config: new 'llm' section (disabled by default)
- 288 tests passing (18 new)
- Version 0.2.0

BREAKING: None — LLM is opt-in, regex behavior unchanged
This commit is contained in:
Claudia 2026-02-17 14:04:43 +01:00
parent 44c78eaf5a
commit 0d592b8f2b
12 changed files with 571 additions and 14 deletions

4
package-lock.json generated
View file

@ -1,12 +1,12 @@
{
"name": "@vainplex/openclaw-cortex",
"version": "0.1.2",
"version": "0.2.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@vainplex/openclaw-cortex",
"version": "0.1.2",
"version": "0.2.0",
"license": "MIT",
"devDependencies": {
"@types/node": "^22.0.0",

View file

@ -1,6 +1,6 @@
{
"name": "@vainplex/openclaw-cortex",
"version": "0.1.2",
"version": "0.2.0",
"description": "OpenClaw plugin: conversation intelligence — thread tracking, decision extraction, boot context, pre-compaction snapshots",
"type": "module",
"main": "dist/index.js",

View file

@ -31,6 +31,14 @@ export const DEFAULTS: CortexConfig = {
patterns: {
language: "both",
},
llm: {
enabled: false,
endpoint: "http://localhost:11434/v1",
model: "mistral:7b",
apiKey: "",
timeoutMs: 15000,
batchSize: 3,
},
};
function bool(value: unknown, fallback: boolean): boolean {
@ -59,6 +67,7 @@ export function resolveConfig(pluginConfig?: Record<string, unknown>): CortexCon
const pc = (raw.preCompaction ?? {}) as Record<string, unknown>;
const nr = (raw.narrative ?? {}) as Record<string, unknown>;
const pt = (raw.patterns ?? {}) as Record<string, unknown>;
const lm = (raw.llm ?? {}) as Record<string, unknown>;
return {
enabled: bool(raw.enabled, DEFAULTS.enabled),
@ -91,6 +100,14 @@ export function resolveConfig(pluginConfig?: Record<string, unknown>): CortexCon
patterns: {
language: lang(pt.language),
},
llm: {
enabled: bool(lm.enabled, DEFAULTS.llm.enabled),
endpoint: str(lm.endpoint, DEFAULTS.llm.endpoint),
model: str(lm.model, DEFAULTS.llm.model),
apiKey: str(lm.apiKey, DEFAULTS.llm.apiKey),
timeoutMs: int(lm.timeoutMs, DEFAULTS.llm.timeoutMs),
batchSize: int(lm.batchSize, DEFAULTS.llm.batchSize),
},
};
}

View file

@ -156,6 +156,29 @@ export class DecisionTracker {
}
}
/**
* Add a decision directly (from LLM analysis). Deduplicates and persists.
*/
addDecision(what: string, who: string, impact: ImpactLevel | string): void {
const now = new Date();
if (this.isDuplicate(what, now)) return;
const validImpact = (["critical", "high", "medium", "low"].includes(impact) ? impact : "medium") as ImpactLevel;
this.decisions.push({
id: randomUUID(),
what: what.slice(0, 200),
date: now.toISOString().slice(0, 10),
why: `LLM-detected decision (${who})`,
impact: validImpact,
who,
extracted_at: now.toISOString(),
});
this.enforceMax();
this.persist();
}
/**
* Get all decisions (in-memory).
*/

View file

@ -9,6 +9,7 @@ import { ThreadTracker } from "./thread-tracker.js";
import { DecisionTracker } from "./decision-tracker.js";
import { BootContextGenerator } from "./boot-context.js";
import { PreCompaction } from "./pre-compaction.js";
import { LlmEnhancer, resolveLlmConfig } from "./llm-enhance.js";
/**
* Extract message content from a hook event using the fallback chain.
@ -29,6 +30,7 @@ type HookState = {
workspace: string | null;
threadTracker: ThreadTracker | null;
decisionTracker: DecisionTracker | null;
llmEnhancer: LlmEnhancer | null;
};
function ensureInit(state: HookState, config: CortexConfig, logger: OpenClawPluginApi["logger"], ctx?: HookContext): void {
@ -41,20 +43,40 @@ function ensureInit(state: HookState, config: CortexConfig, logger: OpenClawPlug
if (!state.decisionTracker && config.decisionTracker.enabled) {
state.decisionTracker = new DecisionTracker(state.workspace, config.decisionTracker, config.patterns.language, logger);
}
if (!state.llmEnhancer && config.llm.enabled) {
state.llmEnhancer = new LlmEnhancer(config.llm, logger);
}
}
/** Register message hooks (message_received + message_sent). */
function registerMessageHooks(api: OpenClawPluginApi, config: CortexConfig, state: HookState): void {
if (!config.threadTracker.enabled && !config.decisionTracker.enabled) return;
const handler = (event: HookEvent, ctx: HookContext, senderOverride?: string) => {
const handler = async (event: HookEvent, ctx: HookContext, senderOverride?: string) => {
try {
ensureInit(state, config, api.logger, ctx);
const content = extractContent(event);
const sender = senderOverride ?? extractSender(event);
if (!content) return;
// Regex-based processing (always runs — zero cost)
if (config.threadTracker.enabled && state.threadTracker) state.threadTracker.processMessage(content, sender);
if (config.decisionTracker.enabled && state.decisionTracker) state.decisionTracker.processMessage(content, sender);
// LLM enhancement (optional — batched, async, fire-and-forget)
if (state.llmEnhancer) {
const role = senderOverride ? "assistant" as const : "user" as const;
const analysis = await state.llmEnhancer.addMessage(content, sender, role);
if (analysis) {
// Apply LLM findings on top of regex results
if (state.threadTracker) state.threadTracker.applyLlmAnalysis(analysis);
if (state.decisionTracker) {
for (const dec of analysis.decisions) {
state.decisionTracker.addDecision(dec.what, dec.who, dec.impact);
}
}
}
}
} catch (err) {
api.logger.warn(`[cortex] message hook error: ${err}`);
}
@ -109,13 +131,13 @@ function registerCompactionHooks(api: OpenClawPluginApi, config: CortexConfig, s
* Each handler is wrapped in try/catch never throws.
*/
export function registerCortexHooks(api: OpenClawPluginApi, config: CortexConfig): void {
const state: HookState = { workspace: null, threadTracker: null, decisionTracker: null };
const state: HookState = { workspace: null, threadTracker: null, decisionTracker: null, llmEnhancer: null };
registerMessageHooks(api, config, state);
registerSessionHooks(api, config, state);
registerCompactionHooks(api, config, state);
api.logger.info(
`[cortex] Hooks registered — threads:${config.threadTracker.enabled} decisions:${config.decisionTracker.enabled} boot:${config.bootContext.enabled} compaction:${config.preCompaction.enabled}`,
`[cortex] Hooks registered — threads:${config.threadTracker.enabled} decisions:${config.decisionTracker.enabled} boot:${config.bootContext.enabled} compaction:${config.preCompaction.enabled} llm:${config.llm.enabled}${config.llm.enabled ? ` (${config.llm.model}@${config.llm.endpoint})` : ""}`,
);
}

258
src/llm-enhance.ts Normal file
View file

@ -0,0 +1,258 @@
import { request } from "node:http";
import { URL } from "node:url";
import type { PluginLogger } from "./types.js";
/**
* LLM Enhancement optional AI-powered analysis layered on top of regex patterns.
*
* When enabled, sends conversation snippets to a local or remote LLM for deeper
* thread/decision/closure detection. Falls back gracefully to regex-only on failure.
*
* Supports any OpenAI-compatible API (Ollama, vLLM, OpenRouter, OpenAI, etc.)
*/
export type LlmConfig = {
enabled: boolean;
/** OpenAI-compatible endpoint, e.g. "http://localhost:11434/v1" */
endpoint: string;
/** Model identifier, e.g. "mistral:7b" or "gpt-4o-mini" */
model: string;
/** API key (optional, for cloud providers) */
apiKey: string;
/** Timeout in ms for LLM calls */
timeoutMs: number;
/** Minimum message count before triggering LLM (batches for efficiency) */
batchSize: number;
};
export const LLM_DEFAULTS: LlmConfig = {
enabled: false,
endpoint: "http://localhost:11434/v1",
model: "mistral:7b",
apiKey: "",
timeoutMs: 15000,
batchSize: 3,
};
export type LlmAnalysis = {
threads: Array<{
title: string;
status: "open" | "closed";
summary?: string;
}>;
decisions: Array<{
what: string;
who: string;
impact: "high" | "medium" | "low";
}>;
closures: string[];
mood: string;
};
const SYSTEM_PROMPT = `You are a conversation analyst. Given a snippet of conversation between a user and an AI assistant, extract:
1. **threads**: Active topics being discussed. Each has a title (short, specific) and status (open/closed).
2. **decisions**: Any decisions made. Include what was decided, who decided, and impact (high/medium/low).
3. **closures**: Thread titles that were completed/resolved in this snippet.
4. **mood**: Overall conversation mood (neutral/frustrated/excited/tense/productive/exploratory).
Rules:
- Only extract REAL topics, not meta-conversation ("how are you", greetings, etc.)
- Thread titles should be specific and actionable ("auth migration to OAuth2", not "the thing")
- Decisions must be actual commitments, not questions or suggestions
- Be conservative when in doubt, don't extract
Respond ONLY with valid JSON matching this schema:
{"threads":[{"title":"...","status":"open|closed","summary":"..."}],"decisions":[{"what":"...","who":"...","impact":"high|medium|low"}],"closures":["thread title"],"mood":"neutral"}`;
/**
* Call an OpenAI-compatible chat completion API.
*/
function callLlm(
config: LlmConfig,
messages: Array<{ role: string; content: string }>,
logger: PluginLogger,
): Promise<string | null> {
return new Promise((resolve) => {
try {
const url = new URL(`${config.endpoint}/chat/completions`);
const body = JSON.stringify({
model: config.model,
messages,
temperature: 0.1,
max_tokens: 1000,
response_format: { type: "json_object" },
});
const headers: Record<string, string> = {
"Content-Type": "application/json",
"Content-Length": String(Buffer.byteLength(body)),
};
if (config.apiKey) {
headers["Authorization"] = `Bearer ${config.apiKey}`;
}
const proto = url.protocol === "https:" ? require("node:https") : require("node:http");
const req = proto.request(
{
hostname: url.hostname,
port: url.port || (url.protocol === "https:" ? 443 : 80),
path: url.pathname,
method: "POST",
headers,
timeout: config.timeoutMs,
},
(res: any) => {
let data = "";
res.on("data", (chunk: string) => (data += chunk));
res.on("end", () => {
try {
const parsed = JSON.parse(data);
const content = parsed?.choices?.[0]?.message?.content;
resolve(content ?? null);
} catch {
logger.warn(`[cortex-llm] Failed to parse LLM response`);
resolve(null);
}
});
},
);
req.on("error", (err: Error) => {
logger.warn(`[cortex-llm] Request error: ${err.message}`);
resolve(null);
});
req.on("timeout", () => {
req.destroy();
logger.warn(`[cortex-llm] Request timed out (${config.timeoutMs}ms)`);
resolve(null);
});
req.write(body);
req.end();
} catch (err) {
logger.warn(`[cortex-llm] Exception: ${err}`);
resolve(null);
}
});
}
/**
* Parse LLM JSON response into structured analysis.
* Returns null on any parse failure (graceful degradation).
*/
function parseAnalysis(raw: string, logger: PluginLogger): LlmAnalysis | null {
try {
const parsed = JSON.parse(raw);
return {
threads: Array.isArray(parsed.threads)
? parsed.threads.filter(
(t: any) => typeof t.title === "string" && t.title.length > 2,
)
: [],
decisions: Array.isArray(parsed.decisions)
? parsed.decisions.filter(
(d: any) => typeof d.what === "string" && d.what.length > 5,
)
: [],
closures: Array.isArray(parsed.closures)
? parsed.closures.filter((c: any) => typeof c === "string")
: [],
mood: typeof parsed.mood === "string" ? parsed.mood : "neutral",
};
} catch {
logger.warn(`[cortex-llm] Failed to parse analysis JSON`);
return null;
}
}
/**
* Message buffer for batching LLM calls.
*/
export class LlmEnhancer {
private buffer: Array<{ role: string; content: string; sender: string }> = [];
private readonly config: LlmConfig;
private readonly logger: PluginLogger;
private lastCallMs = 0;
private readonly cooldownMs = 5000;
constructor(config: LlmConfig, logger: PluginLogger) {
this.config = config;
this.logger = logger;
}
/**
* Buffer a message. Returns analysis when batch is full, null otherwise.
*/
async addMessage(
content: string,
sender: string,
role: "user" | "assistant",
): Promise<LlmAnalysis | null> {
if (!this.config.enabled) return null;
this.buffer.push({ role, content, sender });
if (this.buffer.length < this.config.batchSize) return null;
// Cooldown check
const now = Date.now();
if (now - this.lastCallMs < this.cooldownMs) return null;
this.lastCallMs = now;
// Flush buffer
const batch = this.buffer.splice(0);
return this.analyze(batch);
}
/**
* Force-analyze remaining buffer (e.g. before compaction).
*/
async flush(): Promise<LlmAnalysis | null> {
if (!this.config.enabled || this.buffer.length === 0) return null;
const batch = this.buffer.splice(0);
return this.analyze(batch);
}
private async analyze(
messages: Array<{ role: string; content: string; sender: string }>,
): Promise<LlmAnalysis | null> {
const snippet = messages
.map((m) => `[${m.sender}]: ${m.content}`)
.join("\n\n");
const raw = await callLlm(
this.config,
[
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: snippet },
],
this.logger,
);
if (!raw) return null;
const analysis = parseAnalysis(raw, this.logger);
if (analysis) {
const stats = `threads=${analysis.threads.length} decisions=${analysis.decisions.length} closures=${analysis.closures.length}`;
this.logger.info(`[cortex-llm] Analysis: ${stats}`);
}
return analysis;
}
}
/**
* Resolve LLM config from plugin config.
*/
export function resolveLlmConfig(raw?: Record<string, unknown>): LlmConfig {
if (!raw) return { ...LLM_DEFAULTS };
return {
enabled: typeof raw.enabled === "boolean" ? raw.enabled : LLM_DEFAULTS.enabled,
endpoint: typeof raw.endpoint === "string" ? raw.endpoint : LLM_DEFAULTS.endpoint,
model: typeof raw.model === "string" ? raw.model : LLM_DEFAULTS.model,
apiKey: typeof raw.apiKey === "string" ? raw.apiKey : LLM_DEFAULTS.apiKey,
timeoutMs: typeof raw.timeoutMs === "number" ? raw.timeoutMs : LLM_DEFAULTS.timeoutMs,
batchSize: typeof raw.batchSize === "number" ? raw.batchSize : LLM_DEFAULTS.batchSize,
};
}

View file

@ -32,13 +32,23 @@ const WAIT_PATTERNS_DE = [
];
const TOPIC_PATTERNS_EN = [
/(?:back to|now about|regarding)\s+(\w[\w\s-]{2,30})/i,
/(?:back to|now about|regarding|let's (?:talk|discuss|look at))\s+(?:the\s+)?(\w[\w\s-]{3,40})/i,
];
const TOPIC_PATTERNS_DE = [
/(?:zurück zu|jetzt zu|bzgl\.?|wegen)\s+(\w[\w\s-]{2,30})/i,
/(?:zurück zu|jetzt zu|bzgl\.?|wegen|lass uns (?:über|mal))\s+(?:dem?|die|das)?\s*(\w[\w\s-]{3,40})/i,
];
/** Words that should never be thread titles (noise filter) */
const TOPIC_BLACKLIST = new Set([
"it", "that", "this", "the", "them", "what", "which", "there",
"das", "die", "der", "es", "was", "hier", "dort",
"nothing", "something", "everything", "nichts", "etwas", "alles",
"me", "you", "him", "her", "us", "mir", "dir", "ihm", "uns",
"today", "tomorrow", "yesterday", "heute", "morgen", "gestern",
"noch", "schon", "jetzt", "dann", "also", "aber", "oder",
]);
const MOOD_PATTERNS: Record<Exclude<Mood, "neutral">, RegExp> = {
frustrated: /(?:fuck|shit|mist|nervig|genervt|damn|wtf|argh|schon wieder|zum kotzen|sucks)/i,
excited: /(?:geil|nice|awesome|krass|boom|läuft|yes!|🎯|🚀|perfekt|brilliant|mega|sick)/i,
@ -115,6 +125,24 @@ export function detectMood(text: string): Mood {
return lastMood;
}
/**
* Check if a topic candidate is noise (too short, blacklisted, or garbage).
*/
export function isNoiseTopic(topic: string): boolean {
const trimmed = topic.trim();
if (trimmed.length < 4) return true;
// Single word that's in blacklist
const words = trimmed.toLowerCase().split(/\s+/);
if (words.length === 1 && TOPIC_BLACKLIST.has(words[0])) return true;
// All words are blacklisted
if (words.every(w => TOPIC_BLACKLIST.has(w) || w.length < 3)) return true;
// Looks like a sentence fragment (starts with pronoun or blacklisted word)
if (/^(ich|i|we|wir|du|er|sie|he|she|it|es|nichts|nothing|etwas|something)\s/i.test(trimmed)) return true;
// Contains line breaks or is too long for a title
if (trimmed.includes("\n") || trimmed.length > 60) return true;
return false;
}
/** High-impact keywords for decision impact inference */
export const HIGH_IMPACT_KEYWORDS = [
"architecture", "architektur", "security", "sicherheit",

View file

@ -7,7 +7,7 @@ import type {
ThreadPriority,
PluginLogger,
} from "./types.js";
import { getPatterns, detectMood, HIGH_IMPACT_KEYWORDS } from "./patterns.js";
import { getPatterns, detectMood, HIGH_IMPACT_KEYWORDS, isNoiseTopic } from "./patterns.js";
import type { PatternLanguage } from "./patterns.js";
import { loadJson, saveJson, rebootDir, ensureRebootDir } from "./storage.js";
@ -127,9 +127,10 @@ export class ThreadTracker {
this.sessionMood = data.session_mood ?? "neutral";
}
/** Create new threads from topic signals. */
/** Create new threads from topic signals (with noise filtering). */
private createFromTopics(topics: string[], sender: string, mood: string, now: string): void {
for (const topic of topics) {
if (isNoiseTopic(topic)) continue;
const exists = this.threads.some(
t => t.title.toLowerCase() === topic.toLowerCase() || matchesThread(t, topic),
);
@ -143,6 +144,52 @@ export class ThreadTracker {
}
}
/**
* Apply LLM analysis results creates threads, closes threads, adds decisions.
* Called from hooks when LLM enhance is enabled.
*/
applyLlmAnalysis(analysis: {
threads: Array<{ title: string; status: "open" | "closed"; summary?: string }>;
closures: string[];
mood: string;
}): void {
const now = new Date().toISOString();
// Create threads from LLM
for (const lt of analysis.threads) {
if (isNoiseTopic(lt.title)) continue;
const exists = this.threads.some(
t => t.title.toLowerCase() === lt.title.toLowerCase() || matchesThread(t, lt.title),
);
if (!exists) {
this.threads.push({
id: randomUUID(), title: lt.title, status: lt.status,
priority: inferPriority(lt.title), summary: lt.summary ?? "LLM-detected",
decisions: [], waiting_for: null, mood: analysis.mood ?? "neutral",
last_activity: now, created: now,
});
}
}
// Close threads from LLM closures
for (const closure of analysis.closures) {
for (const thread of this.threads) {
if (thread.status === "open" && matchesThread(thread, closure)) {
thread.status = "closed";
thread.last_activity = now;
}
}
}
// Update session mood
if (analysis.mood && analysis.mood !== "neutral") {
this.sessionMood = analysis.mood;
}
this.dirty = true;
this.persist();
}
/** Close threads matching closure signals. */
private closeMatching(content: string, closures: boolean[], now: string): void {
if (closures.length === 0) return;

View file

@ -245,6 +245,14 @@ export type CortexConfig = {
patterns: {
language: "en" | "de" | "both";
};
llm: {
enabled: boolean;
endpoint: string;
model: string;
apiKey: string;
timeoutMs: number;
batchSize: number;
};
};
// ============================================================

97
test/llm-enhance.test.ts Normal file
View file

@ -0,0 +1,97 @@
import { describe, it, expect } from "vitest";
import { resolveLlmConfig, LlmEnhancer, LLM_DEFAULTS } from "../src/llm-enhance.js";
const mockLogger = {
info: () => {},
warn: () => {},
error: () => {},
debug: () => {},
};
describe("resolveLlmConfig", () => {
it("returns defaults when no config provided", () => {
const config = resolveLlmConfig(undefined);
expect(config).toEqual(LLM_DEFAULTS);
expect(config.enabled).toBe(false);
});
it("returns defaults for empty object", () => {
const config = resolveLlmConfig({});
expect(config).toEqual(LLM_DEFAULTS);
});
it("merges partial config with defaults", () => {
const config = resolveLlmConfig({
enabled: true,
model: "qwen2.5:7b",
});
expect(config.enabled).toBe(true);
expect(config.model).toBe("qwen2.5:7b");
expect(config.endpoint).toBe(LLM_DEFAULTS.endpoint);
expect(config.timeoutMs).toBe(LLM_DEFAULTS.timeoutMs);
expect(config.batchSize).toBe(LLM_DEFAULTS.batchSize);
});
it("respects custom endpoint and apiKey", () => {
const config = resolveLlmConfig({
enabled: true,
endpoint: "https://api.openai.com/v1",
model: "gpt-4o-mini",
apiKey: "sk-test",
timeoutMs: 30000,
batchSize: 5,
});
expect(config.endpoint).toBe("https://api.openai.com/v1");
expect(config.apiKey).toBe("sk-test");
expect(config.timeoutMs).toBe(30000);
expect(config.batchSize).toBe(5);
});
it("ignores invalid types", () => {
const config = resolveLlmConfig({
enabled: "yes" as any,
model: 42 as any,
timeoutMs: "fast" as any,
});
expect(config.enabled).toBe(LLM_DEFAULTS.enabled);
expect(config.model).toBe(LLM_DEFAULTS.model);
expect(config.timeoutMs).toBe(LLM_DEFAULTS.timeoutMs);
});
});
describe("LlmEnhancer", () => {
it("returns null when disabled", async () => {
const enhancer = new LlmEnhancer({ ...LLM_DEFAULTS, enabled: false }, mockLogger);
const result = await enhancer.addMessage("test message", "user1", "user");
expect(result).toBeNull();
});
it("buffers messages until batchSize", async () => {
const enhancer = new LlmEnhancer(
{ ...LLM_DEFAULTS, enabled: true, batchSize: 3 },
mockLogger,
);
// First two messages should buffer (no LLM call)
const r1 = await enhancer.addMessage("hello", "user1", "user");
expect(r1).toBeNull();
const r2 = await enhancer.addMessage("world", "assistant", "assistant");
expect(r2).toBeNull();
// Third would trigger LLM but will fail gracefully (no server)
const r3 = await enhancer.addMessage("test", "user1", "user");
// Returns null because localhost:11434 is not guaranteed
// The important thing is it doesn't throw
expect(r3 === null || typeof r3 === "object").toBe(true);
});
it("flush returns null when no messages buffered", async () => {
const enhancer = new LlmEnhancer({ ...LLM_DEFAULTS, enabled: true }, mockLogger);
const result = await enhancer.flush();
expect(result).toBeNull();
});
it("flush returns null when disabled", async () => {
const enhancer = new LlmEnhancer({ ...LLM_DEFAULTS, enabled: false }, mockLogger);
const result = await enhancer.flush();
expect(result).toBeNull();
});
});

57
test/noise-filter.test.ts Normal file
View file

@ -0,0 +1,57 @@
import { describe, it, expect } from "vitest";
import { isNoiseTopic } from "../src/patterns.js";
describe("isNoiseTopic", () => {
it("rejects short strings", () => {
expect(isNoiseTopic("foo")).toBe(true);
expect(isNoiseTopic("ab")).toBe(true);
expect(isNoiseTopic("")).toBe(true);
});
it("rejects single blacklisted words", () => {
expect(isNoiseTopic("that")).toBe(true);
expect(isNoiseTopic("this")).toBe(true);
expect(isNoiseTopic("nichts")).toBe(true);
expect(isNoiseTopic("alles")).toBe(true);
});
it("rejects all-blacklisted multi-word", () => {
expect(isNoiseTopic("das was es")).toBe(true);
expect(isNoiseTopic("the that it")).toBe(true);
});
it("rejects sentence fragments starting with pronouns", () => {
expect(isNoiseTopic("ich habe nichts gepostet")).toBe(true);
expect(isNoiseTopic("we should do something")).toBe(true);
expect(isNoiseTopic("er hat gesagt")).toBe(true);
expect(isNoiseTopic("I think maybe")).toBe(true);
});
it("rejects topics with newlines", () => {
expect(isNoiseTopic("line one\nline two")).toBe(true);
});
it("rejects topics longer than 60 chars", () => {
const long = "a".repeat(61);
expect(isNoiseTopic(long)).toBe(true);
});
it("accepts valid topic names", () => {
expect(isNoiseTopic("Auth Migration")).toBe(false);
expect(isNoiseTopic("Plugin-Repo Setup")).toBe(false);
expect(isNoiseTopic("NATS Event Store")).toBe(false);
expect(isNoiseTopic("Cortex Demo")).toBe(false);
expect(isNoiseTopic("Security Audit")).toBe(false);
expect(isNoiseTopic("Deployment Pipeline")).toBe(false);
});
it("accepts german topic names", () => {
expect(isNoiseTopic("Darkplex Analyse")).toBe(false);
expect(isNoiseTopic("Credential Rotation")).toBe(false);
expect(isNoiseTopic("Thread Tracking Qualität")).toBe(false);
});
it("rejects 'nichts gepostet habe' (real-world noise)", () => {
expect(isNoiseTopic("nichts gepostet habe")).toBe(true);
});
});

View file

@ -260,10 +260,10 @@ describe("topic patterns", () => {
expect(anyMatch(topic, "just a random sentence")).toBe(false);
});
it("limits captured topic to 30 chars", () => {
const topics = captureTopics(topic, "back to the very long topic name that exceeds thirty characters limit here");
it("limits captured topic to 40 chars", () => {
const topics = captureTopics(topic, "back to the very long topic name that exceeds forty characters limit here and keeps going");
if (topics.length > 0) {
expect(topics[0].length).toBeLessThanOrEqual(31);
expect(topics[0].length).toBeLessThanOrEqual(41);
}
});
});
@ -274,7 +274,7 @@ describe("topic patterns", () => {
it("captures topic after 'zurück zu'", () => {
const topics = captureTopics(topic, "Zurück zu der Auth-Migration");
expect(topics.length).toBeGreaterThan(0);
expect(topics[0]).toContain("der Auth-Migration");
expect(topics[0]).toContain("Auth-Migration");
});
it("captures topic after 'jetzt zu'", () => {