* fix(ui): allow relative URLs in avatar validation
The isAvatarUrl check only accepted http://, https://, or data: URLs,
but the /avatar/{agentId} endpoint returns relative paths like /avatar/main.
This caused local file avatars to display as text instead of images.
Fixes avatar display for locally configured avatar files.
* fix(gateway): resolve local avatars to URL in HTML injection and RPC
The frontend fix alone wasn't enough because:
1. serveIndexHtml() was injecting the raw avatar filename into HTML
2. agent.identity.get RPC was returning raw filename, overwriting the
HTML-injected value
Now both paths resolve local file avatars (*.png, *.jpg, etc.) to the
/avatar/{agentId} endpoint URL.
* feat(compaction): add adaptive chunk sizing and progressive fallback
- Add computeAdaptiveChunkRatio() to reduce chunk size for large messages
- Add isOversizedForSummary() to detect messages too large to summarize
- Add summarizeWithFallback() with progressive fallback:
- Tries full summarization first
- Falls back to partial summarization excluding oversized messages
- Notes oversized messages in the summary output
- Add SAFETY_MARGIN (1.2x) buffer for token estimation inaccuracy
- Reduce MIN_CHUNK_RATIO to 0.15 for very large messages
This prevents compaction failures when conversations contain
unusually large tool outputs or responses that exceed the
summarization model's context window.
* feat(ui): add compaction indicator and improve event error handling
Compaction indicator:
- Add CompactionStatus type and handleCompactionEvent() in app-tool-stream.ts
- Show '🧹 Compacting context...' toast while active (with pulse animation)
- Show '🧹 Context compacted' briefly after completion
- Auto-clear toast after 5 seconds
- Add CSS styles for .callout.info, .callout.success, .compaction-indicator
Error handling improvements:
- Wrap onEvent callback in try/catch in gateway.ts to prevent errors
from breaking the WebSocket message handler
- Wrap handleGatewayEvent in try/catch with console.error logging
to isolate errors and make them visible in devtools
These changes address UI freezes during heavy agent activity by:
1. Showing users when compaction is happening
2. Preventing uncaught errors from silently breaking the event loop
* fix(control-ui): add agentId to DEFAULT_ASSISTANT_IDENTITY
TypeScript inferred the union type without agentId when falling back to
DEFAULT_ASSISTANT_IDENTITY, causing build errors at control-ui.ts:222-223.
413 lines
13 KiB
TypeScript
413 lines
13 KiB
TypeScript
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
|
import type { ExtensionAPI, ExtensionContext, FileOperations } from "@mariozechner/pi-coding-agent";
|
|
import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";
|
|
|
|
import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";
|
|
|
|
const BASE_CHUNK_RATIO = 0.4;
|
|
const MIN_CHUNK_RATIO = 0.15;
|
|
const SAFETY_MARGIN = 1.2; // 20% buffer for estimateTokens() inaccuracy
|
|
const FALLBACK_SUMMARY =
|
|
"Summary unavailable due to context limits. Older messages were truncated.";
|
|
const TURN_PREFIX_INSTRUCTIONS =
|
|
"This summary covers the prefix of a split turn. Focus on the original request," +
|
|
" early progress, and any details needed to understand the retained suffix.";
|
|
const MAX_TOOL_FAILURES = 8;
|
|
const MAX_TOOL_FAILURE_CHARS = 240;
|
|
|
|
type ToolFailure = {
|
|
toolCallId: string;
|
|
toolName: string;
|
|
summary: string;
|
|
meta?: string;
|
|
};
|
|
|
|
function normalizeFailureText(text: string): string {
|
|
return text.replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function truncateFailureText(text: string, maxChars: number): string {
|
|
if (text.length <= maxChars) return text;
|
|
return `${text.slice(0, Math.max(0, maxChars - 3))}...`;
|
|
}
|
|
|
|
function formatToolFailureMeta(details: unknown): string | undefined {
|
|
if (!details || typeof details !== "object") return undefined;
|
|
const record = details as Record<string, unknown>;
|
|
const status = typeof record.status === "string" ? record.status : undefined;
|
|
const exitCode =
|
|
typeof record.exitCode === "number" && Number.isFinite(record.exitCode)
|
|
? record.exitCode
|
|
: undefined;
|
|
const parts: string[] = [];
|
|
if (status) parts.push(`status=${status}`);
|
|
if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
|
|
return parts.length > 0 ? parts.join(" ") : undefined;
|
|
}
|
|
|
|
function extractToolResultText(content: unknown): string {
|
|
if (!Array.isArray(content)) return "";
|
|
const parts: string[] = [];
|
|
for (const block of content) {
|
|
if (!block || typeof block !== "object") continue;
|
|
const rec = block as { type?: unknown; text?: unknown };
|
|
if (rec.type === "text" && typeof rec.text === "string") {
|
|
parts.push(rec.text);
|
|
}
|
|
}
|
|
return parts.join("\n");
|
|
}
|
|
|
|
function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
|
|
const failures: ToolFailure[] = [];
|
|
const seen = new Set<string>();
|
|
|
|
for (const message of messages) {
|
|
if (!message || typeof message !== "object") continue;
|
|
const role = (message as { role?: unknown }).role;
|
|
if (role !== "toolResult") continue;
|
|
const toolResult = message as {
|
|
toolCallId?: unknown;
|
|
toolName?: unknown;
|
|
content?: unknown;
|
|
details?: unknown;
|
|
isError?: unknown;
|
|
};
|
|
if (toolResult.isError !== true) continue;
|
|
const toolCallId = typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : "";
|
|
if (!toolCallId || seen.has(toolCallId)) continue;
|
|
seen.add(toolCallId);
|
|
|
|
const toolName =
|
|
typeof toolResult.toolName === "string" && toolResult.toolName.trim()
|
|
? toolResult.toolName
|
|
: "tool";
|
|
const rawText = extractToolResultText(toolResult.content);
|
|
const meta = formatToolFailureMeta(toolResult.details);
|
|
const normalized = normalizeFailureText(rawText);
|
|
const summary = truncateFailureText(
|
|
normalized || (meta ? "failed" : "failed (no output)"),
|
|
MAX_TOOL_FAILURE_CHARS,
|
|
);
|
|
failures.push({ toolCallId, toolName, summary, meta });
|
|
}
|
|
|
|
return failures;
|
|
}
|
|
|
|
function formatToolFailuresSection(failures: ToolFailure[]): string {
|
|
if (failures.length === 0) return "";
|
|
const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => {
|
|
const meta = failure.meta ? ` (${failure.meta})` : "";
|
|
return `- ${failure.toolName}${meta}: ${failure.summary}`;
|
|
});
|
|
if (failures.length > MAX_TOOL_FAILURES) {
|
|
lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
|
|
}
|
|
return `\n\n## Tool Failures\n${lines.join("\n")}`;
|
|
}
|
|
|
|
function computeFileLists(fileOps: FileOperations): {
|
|
readFiles: string[];
|
|
modifiedFiles: string[];
|
|
} {
|
|
const modified = new Set([...fileOps.edited, ...fileOps.written]);
|
|
const readFiles = [...fileOps.read].filter((f) => !modified.has(f)).sort();
|
|
const modifiedFiles = [...modified].sort();
|
|
return { readFiles, modifiedFiles };
|
|
}
|
|
|
|
function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string {
|
|
const sections: string[] = [];
|
|
if (readFiles.length > 0) {
|
|
sections.push(`<read-files>\n${readFiles.join("\n")}\n</read-files>`);
|
|
}
|
|
if (modifiedFiles.length > 0) {
|
|
sections.push(`<modified-files>\n${modifiedFiles.join("\n")}\n</modified-files>`);
|
|
}
|
|
if (sections.length === 0) return "";
|
|
return `\n\n${sections.join("\n\n")}`;
|
|
}
|
|
|
|
function chunkMessages(messages: AgentMessage[], maxTokens: number): AgentMessage[][] {
|
|
if (messages.length === 0) return [];
|
|
|
|
const chunks: AgentMessage[][] = [];
|
|
let currentChunk: AgentMessage[] = [];
|
|
let currentTokens = 0;
|
|
|
|
for (const message of messages) {
|
|
const messageTokens = estimateTokens(message);
|
|
if (currentChunk.length > 0 && currentTokens + messageTokens > maxTokens) {
|
|
chunks.push(currentChunk);
|
|
currentChunk = [];
|
|
currentTokens = 0;
|
|
}
|
|
|
|
currentChunk.push(message);
|
|
currentTokens += messageTokens;
|
|
|
|
if (messageTokens > maxTokens) {
|
|
// Split oversized messages to avoid unbounded chunk growth.
|
|
chunks.push(currentChunk);
|
|
currentChunk = [];
|
|
currentTokens = 0;
|
|
}
|
|
}
|
|
|
|
if (currentChunk.length > 0) {
|
|
chunks.push(currentChunk);
|
|
}
|
|
|
|
return chunks;
|
|
}
|
|
|
|
/**
|
|
* Compute adaptive chunk ratio based on average message size.
|
|
* When messages are large, we use smaller chunks to avoid exceeding model limits.
|
|
*/
|
|
function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number {
|
|
if (messages.length === 0) return BASE_CHUNK_RATIO;
|
|
|
|
const totalTokens = messages.reduce((sum, m) => sum + estimateTokens(m), 0);
|
|
const avgTokens = totalTokens / messages.length;
|
|
|
|
// Apply safety margin to account for estimation inaccuracy
|
|
const safeAvgTokens = avgTokens * SAFETY_MARGIN;
|
|
const avgRatio = safeAvgTokens / contextWindow;
|
|
|
|
// If average message is > 10% of context, reduce chunk ratio
|
|
if (avgRatio > 0.1) {
|
|
const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
|
|
return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
|
|
}
|
|
|
|
return BASE_CHUNK_RATIO;
|
|
}
|
|
|
|
/**
|
|
* Check if a single message is too large to summarize.
|
|
* If single message > 50% of context, it can't be summarized safely.
|
|
*/
|
|
function isOversizedForSummary(msg: AgentMessage, contextWindow: number): boolean {
|
|
const tokens = estimateTokens(msg) * SAFETY_MARGIN;
|
|
return tokens > contextWindow * 0.5;
|
|
}
|
|
|
|
async function summarizeChunks(params: {
|
|
messages: AgentMessage[];
|
|
model: NonNullable<ExtensionContext["model"]>;
|
|
apiKey: string;
|
|
signal: AbortSignal;
|
|
reserveTokens: number;
|
|
maxChunkTokens: number;
|
|
customInstructions?: string;
|
|
previousSummary?: string;
|
|
}): Promise<string> {
|
|
if (params.messages.length === 0) {
|
|
return params.previousSummary ?? "No prior history.";
|
|
}
|
|
|
|
const chunks = chunkMessages(params.messages, params.maxChunkTokens);
|
|
let summary = params.previousSummary;
|
|
|
|
for (const chunk of chunks) {
|
|
summary = await generateSummary(
|
|
chunk,
|
|
params.model,
|
|
params.reserveTokens,
|
|
params.apiKey,
|
|
params.signal,
|
|
params.customInstructions,
|
|
summary,
|
|
);
|
|
}
|
|
|
|
return summary ?? "No prior history.";
|
|
}
|
|
|
|
/**
|
|
* Summarize with progressive fallback for handling oversized messages.
|
|
* If full summarization fails, tries partial summarization excluding oversized messages.
|
|
*/
|
|
async function summarizeWithFallback(params: {
|
|
messages: AgentMessage[];
|
|
model: NonNullable<ExtensionContext["model"]>;
|
|
apiKey: string;
|
|
signal: AbortSignal;
|
|
reserveTokens: number;
|
|
maxChunkTokens: number;
|
|
contextWindow: number;
|
|
customInstructions?: string;
|
|
previousSummary?: string;
|
|
}): Promise<string> {
|
|
const { messages, contextWindow } = params;
|
|
|
|
if (messages.length === 0) {
|
|
return params.previousSummary ?? "No prior history.";
|
|
}
|
|
|
|
// Try full summarization first
|
|
try {
|
|
return await summarizeChunks(params);
|
|
} catch (fullError) {
|
|
console.warn(
|
|
`Full summarization failed, trying partial: ${
|
|
fullError instanceof Error ? fullError.message : String(fullError)
|
|
}`,
|
|
);
|
|
}
|
|
|
|
// Fallback 1: Summarize only small messages, note oversized ones
|
|
const smallMessages: AgentMessage[] = [];
|
|
const oversizedNotes: string[] = [];
|
|
|
|
for (const msg of messages) {
|
|
if (isOversizedForSummary(msg, contextWindow)) {
|
|
const role = (msg as { role?: string }).role ?? "message";
|
|
const tokens = estimateTokens(msg);
|
|
oversizedNotes.push(
|
|
`[Large ${role} (~${Math.round(tokens / 1000)}K tokens) omitted from summary]`,
|
|
);
|
|
} else {
|
|
smallMessages.push(msg);
|
|
}
|
|
}
|
|
|
|
if (smallMessages.length > 0) {
|
|
try {
|
|
const partialSummary = await summarizeChunks({
|
|
...params,
|
|
messages: smallMessages,
|
|
});
|
|
const notes = oversizedNotes.length > 0 ? `\n\n${oversizedNotes.join("\n")}` : "";
|
|
return partialSummary + notes;
|
|
} catch (partialError) {
|
|
console.warn(
|
|
`Partial summarization also failed: ${
|
|
partialError instanceof Error ? partialError.message : String(partialError)
|
|
}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
// Final fallback: Just note what was there
|
|
return (
|
|
`Context contained ${messages.length} messages (${oversizedNotes.length} oversized). ` +
|
|
`Summary unavailable due to size limits.`
|
|
);
|
|
}
|
|
|
|
export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|
api.on("session_before_compact", async (event, ctx) => {
|
|
const { preparation, customInstructions, signal } = event;
|
|
const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
|
|
const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
|
|
const toolFailures = collectToolFailures([
|
|
...preparation.messagesToSummarize,
|
|
...preparation.turnPrefixMessages,
|
|
]);
|
|
const toolFailureSection = formatToolFailuresSection(toolFailures);
|
|
const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`;
|
|
|
|
const model = ctx.model;
|
|
if (!model) {
|
|
return {
|
|
compaction: {
|
|
summary: fallbackSummary,
|
|
firstKeptEntryId: preparation.firstKeptEntryId,
|
|
tokensBefore: preparation.tokensBefore,
|
|
details: { readFiles, modifiedFiles },
|
|
},
|
|
};
|
|
}
|
|
|
|
const apiKey = await ctx.modelRegistry.getApiKey(model);
|
|
if (!apiKey) {
|
|
return {
|
|
compaction: {
|
|
summary: fallbackSummary,
|
|
firstKeptEntryId: preparation.firstKeptEntryId,
|
|
tokensBefore: preparation.tokensBefore,
|
|
details: { readFiles, modifiedFiles },
|
|
},
|
|
};
|
|
}
|
|
|
|
try {
|
|
const contextWindowTokens = Math.max(
|
|
1,
|
|
Math.floor(model.contextWindow ?? DEFAULT_CONTEXT_TOKENS),
|
|
);
|
|
|
|
// Use adaptive chunk ratio based on message sizes
|
|
const allMessages = [...preparation.messagesToSummarize, ...preparation.turnPrefixMessages];
|
|
const adaptiveRatio = computeAdaptiveChunkRatio(allMessages, contextWindowTokens);
|
|
const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio));
|
|
const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens));
|
|
|
|
const historySummary = await summarizeWithFallback({
|
|
messages: preparation.messagesToSummarize,
|
|
model,
|
|
apiKey,
|
|
signal,
|
|
reserveTokens,
|
|
maxChunkTokens,
|
|
contextWindow: contextWindowTokens,
|
|
customInstructions,
|
|
previousSummary: preparation.previousSummary,
|
|
});
|
|
|
|
let summary = historySummary;
|
|
if (preparation.isSplitTurn && preparation.turnPrefixMessages.length > 0) {
|
|
const prefixSummary = await summarizeWithFallback({
|
|
messages: preparation.turnPrefixMessages,
|
|
model,
|
|
apiKey,
|
|
signal,
|
|
reserveTokens,
|
|
maxChunkTokens,
|
|
contextWindow: contextWindowTokens,
|
|
customInstructions: TURN_PREFIX_INSTRUCTIONS,
|
|
});
|
|
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
|
}
|
|
|
|
summary += toolFailureSection;
|
|
summary += fileOpsSummary;
|
|
|
|
return {
|
|
compaction: {
|
|
summary,
|
|
firstKeptEntryId: preparation.firstKeptEntryId,
|
|
tokensBefore: preparation.tokensBefore,
|
|
details: { readFiles, modifiedFiles },
|
|
},
|
|
};
|
|
} catch (error) {
|
|
console.warn(
|
|
`Compaction summarization failed; truncating history: ${
|
|
error instanceof Error ? error.message : String(error)
|
|
}`,
|
|
);
|
|
return {
|
|
compaction: {
|
|
summary: fallbackSummary,
|
|
firstKeptEntryId: preparation.firstKeptEntryId,
|
|
tokensBefore: preparation.tokensBefore,
|
|
details: { readFiles, modifiedFiles },
|
|
},
|
|
};
|
|
}
|
|
});
|
|
}
|
|
|
|
export const __testing = {
|
|
collectToolFailures,
|
|
formatToolFailuresSection,
|
|
computeAdaptiveChunkRatio,
|
|
isOversizedForSummary,
|
|
BASE_CHUNK_RATIO,
|
|
MIN_CHUNK_RATIO,
|
|
SAFETY_MARGIN,
|
|
} as const;
|