openclaw-vainplex/src/agents/pi-extensions/compaction-safeguard.ts
Dave Lauer d03c404cb4
feat(compaction): add adaptive chunk sizing, progressive fallback, and UI indicator (#1466)
* fix(ui): allow relative URLs in avatar validation

The isAvatarUrl check only accepted http://, https://, or data: URLs,
but the /avatar/{agentId} endpoint returns relative paths like /avatar/main.
This caused local file avatars to display as text instead of images.

Fixes avatar display for locally configured avatar files.

* fix(gateway): resolve local avatars to URL in HTML injection and RPC

The frontend fix alone wasn't enough because:
1. serveIndexHtml() was injecting the raw avatar filename into HTML
2. agent.identity.get RPC was returning raw filename, overwriting the
   HTML-injected value

Now both paths resolve local file avatars (*.png, *.jpg, etc.) to the
/avatar/{agentId} endpoint URL.

* feat(compaction): add adaptive chunk sizing and progressive fallback

- Add computeAdaptiveChunkRatio() to reduce chunk size for large messages
- Add isOversizedForSummary() to detect messages too large to summarize
- Add summarizeWithFallback() with progressive fallback:
  - Tries full summarization first
  - Falls back to partial summarization excluding oversized messages
  - Notes oversized messages in the summary output
- Add SAFETY_MARGIN (1.2x) buffer for token estimation inaccuracy
- Reduce MIN_CHUNK_RATIO to 0.15 for very large messages

This prevents compaction failures when conversations contain
unusually large tool outputs or responses that exceed the
summarization model's context window.

* feat(ui): add compaction indicator and improve event error handling

Compaction indicator:
- Add CompactionStatus type and handleCompactionEvent() in app-tool-stream.ts
- Show '🧹 Compacting context...' toast while active (with pulse animation)
- Show '🧹 Context compacted' briefly after completion
- Auto-clear toast after 5 seconds
- Add CSS styles for .callout.info, .callout.success, .compaction-indicator

Error handling improvements:
- Wrap onEvent callback in try/catch in gateway.ts to prevent errors
  from breaking the WebSocket message handler
- Wrap handleGatewayEvent in try/catch with console.error logging
  to isolate errors and make them visible in devtools

These changes address UI freezes during heavy agent activity by:
1. Showing users when compaction is happening
2. Preventing uncaught errors from silently breaking the event loop

* fix(control-ui): add agentId to DEFAULT_ASSISTANT_IDENTITY

TypeScript inferred the union type without agentId when falling back to
DEFAULT_ASSISTANT_IDENTITY, causing build errors at control-ui.ts:222-223.
2026-01-23 06:32:30 +00:00

413 lines
13 KiB
TypeScript

import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ExtensionAPI, ExtensionContext, FileOperations } from "@mariozechner/pi-coding-agent";
import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";
import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";
const BASE_CHUNK_RATIO = 0.4;
const MIN_CHUNK_RATIO = 0.15;
const SAFETY_MARGIN = 1.2; // 20% buffer for estimateTokens() inaccuracy
const FALLBACK_SUMMARY =
"Summary unavailable due to context limits. Older messages were truncated.";
const TURN_PREFIX_INSTRUCTIONS =
"This summary covers the prefix of a split turn. Focus on the original request," +
" early progress, and any details needed to understand the retained suffix.";
const MAX_TOOL_FAILURES = 8;
const MAX_TOOL_FAILURE_CHARS = 240;
type ToolFailure = {
toolCallId: string;
toolName: string;
summary: string;
meta?: string;
};
function normalizeFailureText(text: string): string {
return text.replace(/\s+/g, " ").trim();
}
function truncateFailureText(text: string, maxChars: number): string {
if (text.length <= maxChars) return text;
return `${text.slice(0, Math.max(0, maxChars - 3))}...`;
}
function formatToolFailureMeta(details: unknown): string | undefined {
if (!details || typeof details !== "object") return undefined;
const record = details as Record<string, unknown>;
const status = typeof record.status === "string" ? record.status : undefined;
const exitCode =
typeof record.exitCode === "number" && Number.isFinite(record.exitCode)
? record.exitCode
: undefined;
const parts: string[] = [];
if (status) parts.push(`status=${status}`);
if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
return parts.length > 0 ? parts.join(" ") : undefined;
}
function extractToolResultText(content: unknown): string {
if (!Array.isArray(content)) return "";
const parts: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") continue;
const rec = block as { type?: unknown; text?: unknown };
if (rec.type === "text" && typeof rec.text === "string") {
parts.push(rec.text);
}
}
return parts.join("\n");
}
function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
const failures: ToolFailure[] = [];
const seen = new Set<string>();
for (const message of messages) {
if (!message || typeof message !== "object") continue;
const role = (message as { role?: unknown }).role;
if (role !== "toolResult") continue;
const toolResult = message as {
toolCallId?: unknown;
toolName?: unknown;
content?: unknown;
details?: unknown;
isError?: unknown;
};
if (toolResult.isError !== true) continue;
const toolCallId = typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : "";
if (!toolCallId || seen.has(toolCallId)) continue;
seen.add(toolCallId);
const toolName =
typeof toolResult.toolName === "string" && toolResult.toolName.trim()
? toolResult.toolName
: "tool";
const rawText = extractToolResultText(toolResult.content);
const meta = formatToolFailureMeta(toolResult.details);
const normalized = normalizeFailureText(rawText);
const summary = truncateFailureText(
normalized || (meta ? "failed" : "failed (no output)"),
MAX_TOOL_FAILURE_CHARS,
);
failures.push({ toolCallId, toolName, summary, meta });
}
return failures;
}
function formatToolFailuresSection(failures: ToolFailure[]): string {
if (failures.length === 0) return "";
const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => {
const meta = failure.meta ? ` (${failure.meta})` : "";
return `- ${failure.toolName}${meta}: ${failure.summary}`;
});
if (failures.length > MAX_TOOL_FAILURES) {
lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
}
return `\n\n## Tool Failures\n${lines.join("\n")}`;
}
function computeFileLists(fileOps: FileOperations): {
readFiles: string[];
modifiedFiles: string[];
} {
const modified = new Set([...fileOps.edited, ...fileOps.written]);
const readFiles = [...fileOps.read].filter((f) => !modified.has(f)).sort();
const modifiedFiles = [...modified].sort();
return { readFiles, modifiedFiles };
}
function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string {
const sections: string[] = [];
if (readFiles.length > 0) {
sections.push(`<read-files>\n${readFiles.join("\n")}\n</read-files>`);
}
if (modifiedFiles.length > 0) {
sections.push(`<modified-files>\n${modifiedFiles.join("\n")}\n</modified-files>`);
}
if (sections.length === 0) return "";
return `\n\n${sections.join("\n\n")}`;
}
function chunkMessages(messages: AgentMessage[], maxTokens: number): AgentMessage[][] {
if (messages.length === 0) return [];
const chunks: AgentMessage[][] = [];
let currentChunk: AgentMessage[] = [];
let currentTokens = 0;
for (const message of messages) {
const messageTokens = estimateTokens(message);
if (currentChunk.length > 0 && currentTokens + messageTokens > maxTokens) {
chunks.push(currentChunk);
currentChunk = [];
currentTokens = 0;
}
currentChunk.push(message);
currentTokens += messageTokens;
if (messageTokens > maxTokens) {
// Split oversized messages to avoid unbounded chunk growth.
chunks.push(currentChunk);
currentChunk = [];
currentTokens = 0;
}
}
if (currentChunk.length > 0) {
chunks.push(currentChunk);
}
return chunks;
}
/**
* Compute adaptive chunk ratio based on average message size.
* When messages are large, we use smaller chunks to avoid exceeding model limits.
*/
function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number {
if (messages.length === 0) return BASE_CHUNK_RATIO;
const totalTokens = messages.reduce((sum, m) => sum + estimateTokens(m), 0);
const avgTokens = totalTokens / messages.length;
// Apply safety margin to account for estimation inaccuracy
const safeAvgTokens = avgTokens * SAFETY_MARGIN;
const avgRatio = safeAvgTokens / contextWindow;
// If average message is > 10% of context, reduce chunk ratio
if (avgRatio > 0.1) {
const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
}
return BASE_CHUNK_RATIO;
}
/**
* Check if a single message is too large to summarize.
* If single message > 50% of context, it can't be summarized safely.
*/
function isOversizedForSummary(msg: AgentMessage, contextWindow: number): boolean {
const tokens = estimateTokens(msg) * SAFETY_MARGIN;
return tokens > contextWindow * 0.5;
}
async function summarizeChunks(params: {
messages: AgentMessage[];
model: NonNullable<ExtensionContext["model"]>;
apiKey: string;
signal: AbortSignal;
reserveTokens: number;
maxChunkTokens: number;
customInstructions?: string;
previousSummary?: string;
}): Promise<string> {
if (params.messages.length === 0) {
return params.previousSummary ?? "No prior history.";
}
const chunks = chunkMessages(params.messages, params.maxChunkTokens);
let summary = params.previousSummary;
for (const chunk of chunks) {
summary = await generateSummary(
chunk,
params.model,
params.reserveTokens,
params.apiKey,
params.signal,
params.customInstructions,
summary,
);
}
return summary ?? "No prior history.";
}
/**
* Summarize with progressive fallback for handling oversized messages.
* If full summarization fails, tries partial summarization excluding oversized messages.
*/
async function summarizeWithFallback(params: {
messages: AgentMessage[];
model: NonNullable<ExtensionContext["model"]>;
apiKey: string;
signal: AbortSignal;
reserveTokens: number;
maxChunkTokens: number;
contextWindow: number;
customInstructions?: string;
previousSummary?: string;
}): Promise<string> {
const { messages, contextWindow } = params;
if (messages.length === 0) {
return params.previousSummary ?? "No prior history.";
}
// Try full summarization first
try {
return await summarizeChunks(params);
} catch (fullError) {
console.warn(
`Full summarization failed, trying partial: ${
fullError instanceof Error ? fullError.message : String(fullError)
}`,
);
}
// Fallback 1: Summarize only small messages, note oversized ones
const smallMessages: AgentMessage[] = [];
const oversizedNotes: string[] = [];
for (const msg of messages) {
if (isOversizedForSummary(msg, contextWindow)) {
const role = (msg as { role?: string }).role ?? "message";
const tokens = estimateTokens(msg);
oversizedNotes.push(
`[Large ${role} (~${Math.round(tokens / 1000)}K tokens) omitted from summary]`,
);
} else {
smallMessages.push(msg);
}
}
if (smallMessages.length > 0) {
try {
const partialSummary = await summarizeChunks({
...params,
messages: smallMessages,
});
const notes = oversizedNotes.length > 0 ? `\n\n${oversizedNotes.join("\n")}` : "";
return partialSummary + notes;
} catch (partialError) {
console.warn(
`Partial summarization also failed: ${
partialError instanceof Error ? partialError.message : String(partialError)
}`,
);
}
}
// Final fallback: Just note what was there
return (
`Context contained ${messages.length} messages (${oversizedNotes.length} oversized). ` +
`Summary unavailable due to size limits.`
);
}
export default function compactionSafeguardExtension(api: ExtensionAPI): void {
api.on("session_before_compact", async (event, ctx) => {
const { preparation, customInstructions, signal } = event;
const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
const toolFailures = collectToolFailures([
...preparation.messagesToSummarize,
...preparation.turnPrefixMessages,
]);
const toolFailureSection = formatToolFailuresSection(toolFailures);
const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`;
const model = ctx.model;
if (!model) {
return {
compaction: {
summary: fallbackSummary,
firstKeptEntryId: preparation.firstKeptEntryId,
tokensBefore: preparation.tokensBefore,
details: { readFiles, modifiedFiles },
},
};
}
const apiKey = await ctx.modelRegistry.getApiKey(model);
if (!apiKey) {
return {
compaction: {
summary: fallbackSummary,
firstKeptEntryId: preparation.firstKeptEntryId,
tokensBefore: preparation.tokensBefore,
details: { readFiles, modifiedFiles },
},
};
}
try {
const contextWindowTokens = Math.max(
1,
Math.floor(model.contextWindow ?? DEFAULT_CONTEXT_TOKENS),
);
// Use adaptive chunk ratio based on message sizes
const allMessages = [...preparation.messagesToSummarize, ...preparation.turnPrefixMessages];
const adaptiveRatio = computeAdaptiveChunkRatio(allMessages, contextWindowTokens);
const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio));
const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens));
const historySummary = await summarizeWithFallback({
messages: preparation.messagesToSummarize,
model,
apiKey,
signal,
reserveTokens,
maxChunkTokens,
contextWindow: contextWindowTokens,
customInstructions,
previousSummary: preparation.previousSummary,
});
let summary = historySummary;
if (preparation.isSplitTurn && preparation.turnPrefixMessages.length > 0) {
const prefixSummary = await summarizeWithFallback({
messages: preparation.turnPrefixMessages,
model,
apiKey,
signal,
reserveTokens,
maxChunkTokens,
contextWindow: contextWindowTokens,
customInstructions: TURN_PREFIX_INSTRUCTIONS,
});
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
}
summary += toolFailureSection;
summary += fileOpsSummary;
return {
compaction: {
summary,
firstKeptEntryId: preparation.firstKeptEntryId,
tokensBefore: preparation.tokensBefore,
details: { readFiles, modifiedFiles },
},
};
} catch (error) {
console.warn(
`Compaction summarization failed; truncating history: ${
error instanceof Error ? error.message : String(error)
}`,
);
return {
compaction: {
summary: fallbackSummary,
firstKeptEntryId: preparation.firstKeptEntryId,
tokensBefore: preparation.tokensBefore,
details: { readFiles, modifiedFiles },
},
};
}
});
}
export const __testing = {
collectToolFailures,
formatToolFailuresSection,
computeAdaptiveChunkRatio,
isOversizedForSummary,
BASE_CHUNK_RATIO,
MIN_CHUNK_RATIO,
SAFETY_MARGIN,
} as const;