feat: knowledge-engine v0.1.0 — all Cerberus findings fixed
- 83/83 tests passing (was 32/45) - New: src/http-client.ts (shared HTTP/HTTPS client, fixes C2+H1) - Fixed: proper_noun regex exclusions (C6) - Fixed: shutdown hooks registered in hooks.ts (C3) - Fixed: all timers use .unref() (H6) - Fixed: resolveConfig split into smaller functions (C4) - Fixed: extract() split with processMatch helper (C5) - Fixed: FactStore.addFact isLoaded guard (H3) - Fixed: validateConfig split (H2) - Fixed: type-safe config merge, removed as any (H4) - Added: http-client tests, expanded coverage (H5) - Fixed: LLM batch await (S1), fresh RegExp per call (S2) - 1530 LOC source, 1298 LOC tests, strict TypeScript
This commit is contained in:
commit
8964d93c60
28 changed files with 5092 additions and 0 deletions
17
.gitignore
vendored
Normal file
17
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# Node.js
|
||||
node_modules/
|
||||
dist/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
|
||||
# TypeScript
|
||||
*.tsbuildinfo
|
||||
|
||||
# Test
|
||||
coverage/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
374
ARCHITECTURE.md
Normal file
374
ARCHITECTURE.md
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
# Architecture: @vainplex/openclaw-knowledge-engine
|
||||
|
||||
## 1. Overview and Scope
|
||||
|
||||
`@vainplex/openclaw-knowledge-engine` is a TypeScript-based OpenClaw plugin for real-time and batch knowledge extraction from conversational data. It replaces a collection of legacy Python scripts with a unified, modern, and tightly integrated solution.
|
||||
|
||||
The primary goal of this plugin is to identify, extract, and store key information (entities and facts) from user and agent messages. This knowledge is then made available for long-term memory, context enrichment, and improved agent performance. It operates directly within the OpenClaw event pipeline, eliminating the need for external NATS consumers and schedulers.
|
||||
|
||||
### 1.1. Core Features
|
||||
|
||||
- **Hybrid Entity Extraction:** Combines high-speed, low-cost regex extraction with optional, high-fidelity LLM-based extraction.
|
||||
- **Structured Fact Store:** Manages a durable store of facts with metadata, relevance scoring, and a temporal decay mechanism.
|
||||
- **Seamless Integration:** Hooks directly into OpenClaw's lifecycle events (`message_received`, `message_sent`, `session_start`).
|
||||
- **Configurable & Maintainable:** All features are configurable via a JSON schema, and the TypeScript codebase ensures type safety and maintainability.
|
||||
- **Zero Runtime Dependencies:** Relies only on Node.js built-in APIs, mirroring the pattern of `@vainplex/openclaw-cortex`.
|
||||
- **Optional Embeddings:** Can integrate with ChromaDB for semantic search over extracted facts.
|
||||
|
||||
### 1.2. Out of Scope
|
||||
|
||||
- **TypeDB Integration:** The legacy TypeDB dependency is explicitly removed and will not be supported.
|
||||
- **Direct NATS Consumption:** The plugin relies on OpenClaw hooks, not direct interaction with NATS streams.
|
||||
- **UI/Frontend:** This plugin is purely a backend data processing engine.
|
||||
|
||||
---
|
||||
|
||||
## 2. Module Breakdown
|
||||
|
||||
The plugin will be structured similarly to `@vainplex/openclaw-cortex`, with a clear separation of concerns between modules. All source code will reside in the `src/` directory.
|
||||
|
||||
| File | Responsibility |
|
||||
| --------------------- | -------------------------------------------------------------------------------------------------------------- |
|
||||
| `index.ts` | Plugin entry point. Registers hooks, commands, and performs initial configuration validation. |
|
||||
| `src/hooks.ts` | Main integration logic. Registers and orchestrates all OpenClaw hook handlers. Manages shared state. |
|
||||
| `src/types.ts` | Centralized TypeScript type definitions for configuration, entities, facts, and API interfaces. |
|
||||
| `src/config.ts` | Provides functions for resolving and validating the plugin's configuration from `openclaw.plugin.json`. |
|
||||
| `src/storage.ts` | Low-level file I/O utilities for reading/writing JSON files, ensuring atomic writes and handling debouncing. |
|
||||
| `src/entity-extractor.ts`| Implements the entity extraction pipeline. Contains the `EntityExtractor` class. |
|
||||
| `src/fact-store.ts` | Implements the fact storage and retrieval logic. Contains the `FactStore` class, including decay logic. |
|
||||
| `src/llm-enhancer.ts` | Handles communication with an external LLM (e.g., Ollama) for batched, deep extraction of entities and facts. |
|
||||
| `src/embeddings.ts` | Manages optional integration with ChromaDB, including batching and syncing embeddings. |
|
||||
| `src/maintenance.ts` | Encapsulates background tasks like fact decay and embeddings sync, triggered by an internal timer. |
|
||||
| `src/patterns.ts` | Stores default regex patterns for common entities (dates, names, locations, etc.). |
|
||||
|
||||
---
|
||||
|
||||
## 3. Type Definitions
|
||||
|
||||
Located in `src/types.ts`.
|
||||
|
||||
```typescript
|
||||
// src/types.ts
|
||||
|
||||
/**
|
||||
* The public API exposed by the OpenClaw host to the plugin.
|
||||
*/
|
||||
export interface OpenClawPluginApi {
|
||||
pluginConfig: Record<string, unknown>;
|
||||
logger: {
|
||||
info: (msg: string) => void;
|
||||
warn: (msg: string) => void;
|
||||
error: (msg: string) => void;
|
||||
};
|
||||
on: (event: string, handler: (event: HookEvent, ctx: HookContext) => void, options?: { priority: number }) => void;
|
||||
}
|
||||
|
||||
export interface HookEvent {
|
||||
content?: string;
|
||||
message?: string;
|
||||
text?: string;
|
||||
from?: string;
|
||||
sender?: string;
|
||||
role?: "user" | "assistant";
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface HookContext {
|
||||
workspace: string; // Absolute path to the OpenClaw workspace
|
||||
}
|
||||
|
||||
/**
|
||||
* Plugin configuration schema, validated from openclaw.plugin.json.
|
||||
*/
|
||||
export interface KnowledgeConfig {
|
||||
enabled: boolean;
|
||||
workspace: string;
|
||||
extraction: {
|
||||
regex: {
|
||||
enabled: boolean;
|
||||
};
|
||||
llm: {
|
||||
enabled: boolean;
|
||||
model: string;
|
||||
endpoint: string;
|
||||
batchSize: number;
|
||||
cooldownMs: number;
|
||||
};
|
||||
};
|
||||
decay: {
|
||||
enabled: boolean;
|
||||
intervalHours: number;
|
||||
rate: number; // e.g., 0.05 for 5% decay per interval
|
||||
};
|
||||
embeddings: {
|
||||
enabled: boolean;
|
||||
endpoint: string;
|
||||
syncIntervalMinutes: number;
|
||||
collectionName: string;
|
||||
};
|
||||
storage: {
|
||||
maxEntities: number;
|
||||
maxFacts: number;
|
||||
writeDebounceMs: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an extracted entity.
|
||||
*/
|
||||
export interface Entity {
|
||||
id: string; // e.g., "person:claude"
|
||||
type: "person" | "location" | "organization" | "date" | "product" | "concept" | "unknown";
|
||||
value: string; // The canonical value, e.g., "Claude"
|
||||
mentions: string[]; // Different ways it was mentioned, e.g., ["claude", "Claude's"]
|
||||
count: number;
|
||||
importance: number; // 0.0 to 1.0
|
||||
lastSeen: string; // ISO 8601 timestamp
|
||||
source: ("regex" | "llm")[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a structured fact.
|
||||
*/
|
||||
export interface Fact {
|
||||
id: string; // UUID v4
|
||||
subject: string; // Entity ID
|
||||
predicate: string; // e.g., "is-a", "has-property", "works-at"
|
||||
object: string; // Entity ID or literal value
|
||||
relevance: number; // 0.0 to 1.0, subject to decay
|
||||
createdAt: string; // ISO 8601 timestamp
|
||||
lastAccessed: string; // ISO 8601 timestamp
|
||||
source: "ingested" | "extracted-regex" | "extracted-llm";
|
||||
}
|
||||
|
||||
/**
|
||||
* Data structure for entities.json
|
||||
*/
|
||||
export interface EntitiesData {
|
||||
updated: string;
|
||||
entities: Entity[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Data structure for facts.json
|
||||
*/
|
||||
export interface FactsData {
|
||||
updated: string;
|
||||
facts: Fact[];
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Hook Integration Points
|
||||
|
||||
The plugin will register handlers for the following OpenClaw core events:
|
||||
|
||||
| Hook Event | Priority | Handler Logic |
|
||||
| ------------------ | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `message_received` | 100 | - Triggers the real-time entity extraction pipeline. <br> - Extracts content and sender. <br> - Adds the message to the `LlmEnhancer` batch if LLM is enabled. |
|
||||
| `message_sent` | 100 | - Same as `message_received`. Ensures the agent's own messages are processed for knowledge. |
|
||||
| `session_start` | 200 | - Initializes the `Maintenance` service. <br> - Starts the internal timers for fact decay and embeddings sync. <br> - Ensures workspace directories exist. |
|
||||
|
||||
---
|
||||
|
||||
## 5. Entity Extraction Pipeline
|
||||
|
||||
The extraction process runs on every message and is designed to be fast and efficient.
|
||||
|
||||
### 5.1. Regex Extraction
|
||||
|
||||
- **Always On (if enabled):** Runs first on every message.
|
||||
- **Patterns:** A configurable set of regular expressions will be defined in `src/patterns.ts`. These will cover common entities like dates (`YYYY-MM-DD`), email addresses, URLs, and potentially user-defined patterns.
|
||||
- **Performance:** This step is extremely fast and has negligible overhead.
|
||||
- **Output:** Produces a preliminary list of potential entities.
|
||||
|
||||
### 5.2. LLM Enhancement (Batched)
|
||||
|
||||
- **Optional:** Enabled via configuration.
|
||||
- **Batching:** The `LlmEnhancer` class collects messages up to `batchSize` or until `cooldownMs` has passed since the last message. This avoids overwhelming the LLM with single requests.
|
||||
- **Process:**
|
||||
1. A batch of messages is formatted into a single prompt.
|
||||
2. The prompt instructs the LLM to identify entities (person, location, etc.) and structured facts (triples like `Subject-Predicate-Object`).
|
||||
3. The request is sent to the configured LLM endpoint (`extraction.llm.endpoint`).
|
||||
4. The LLM's JSON response is parsed.
|
||||
- **Merging:** LLM-extracted entities are merged with the regex-based results. The `source` array on the `Entity` object is updated to reflect that it was identified by both methods. LLM results are generally given a higher initial `importance` score.
|
||||
|
||||
---
|
||||
|
||||
## 6. Fact Store Design
|
||||
|
||||
The `FactStore` class manages the `facts.json` file, providing an in-memory cache and methods for interacting with facts.
|
||||
|
||||
### 6.1. Data Structure (`facts.json`)
|
||||
|
||||
The file will contain a `FactsData` object:
|
||||
|
||||
```json
|
||||
{
|
||||
"updated": "2026-02-17T15:30:00Z",
|
||||
"facts": [
|
||||
{
|
||||
"id": "f0a4c1b0-9b1e-4b7b-8f3a-0e9c8d7b6a5a",
|
||||
"subject": "person:atlas",
|
||||
"predicate": "is-a",
|
||||
"object": "sub-agent",
|
||||
"relevance": 0.95,
|
||||
"createdAt": "2026-02-17T14:00:00Z",
|
||||
"lastAccessed": "2026-02-17T15:20:00Z",
|
||||
"source": "extracted-llm"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 6.2. `FactStore` Class API
|
||||
|
||||
```typescript
|
||||
// src/fact-store.ts
|
||||
class FactStore {
|
||||
constructor(workspace: string, config: KnowledgeConfig['storage'], logger: Logger);
|
||||
|
||||
// Load facts from facts.json into memory
|
||||
load(): Promise<void>;
|
||||
|
||||
// Add a new fact or update an existing one
|
||||
addFact(fact: Omit<Fact, 'id' | 'createdAt' | 'lastAccessed'>): Fact;
|
||||
|
||||
// Retrieve a fact by its ID
|
||||
getFact(id: string): Fact | undefined;
|
||||
|
||||
// Query facts based on subject, predicate, or object
|
||||
query(query: { subject?: string; predicate?: string; object?: string }): Fact[];
|
||||
|
||||
// Run the decay algorithm on all facts
|
||||
decayFacts(rate: number): { decayedCount: number };
|
||||
|
||||
// Persist the in-memory store to disk (debounced)
|
||||
commit(): Promise<void>;
|
||||
}
|
||||
```
|
||||
|
||||
### 6.3. Storage and Persistence
|
||||
|
||||
- **Debounced Writes:** All modifications to the fact store will trigger a debounced `commit()` call. This ensures that rapid, successive writes (e.g., during a fast-paced conversation) are batched into a single file I/O operation, configured by `storage.writeDebounceMs`.
|
||||
- **Atomic Writes:** The `storage.ts` module will use a "write to temp file then rename" strategy to prevent data corruption if the application terminates mid-write.
|
||||
|
||||
---
|
||||
|
||||
## 7. Decay Algorithm
|
||||
|
||||
The decay algorithm prevents the fact store from becoming cluttered with stale, irrelevant information. It is managed by the `Maintenance` service.
|
||||
|
||||
- **Trigger:** Runs on a schedule defined by `decay.intervalHours`.
|
||||
- **Logic:** For each fact, the relevance score is reduced by the `decay.rate`.
|
||||
```
|
||||
newRelevance = currentRelevance * (1 - decayRate)
|
||||
```
|
||||
- **Floor:** Relevance will not decay below a certain floor (e.g., 0.1) to keep it in the system.
|
||||
- **Promotion:** When a fact is "accessed" (e.g., used to answer a question or mentioned again), its `relevance` score is boosted, and its `lastAccessed` timestamp is updated. A simple boost could be `newRelevance = currentRelevance + (1 - currentRelevance) * 0.5`, pushing it halfway to 1.0.
|
||||
- **Pruning:** Facts with a relevance score below a configurable threshold (e.g., 0.05) after decay might be pruned from the store entirely if `storage.maxFacts` is exceeded.
|
||||
|
||||
---
|
||||
|
||||
## 8. Embeddings Integration
|
||||
|
||||
This feature allows for semantic querying of facts and is entirely optional.
|
||||
|
||||
### 8.1. `Embeddings` Service
|
||||
|
||||
- **Trigger:** Runs on a schedule defined by `embeddings.syncIntervalMinutes`.
|
||||
- **Process:**
|
||||
1. The service scans `facts.json` for any facts that have not yet been embedded.
|
||||
2. It formats each fact into a natural language string, e.g., "Atlas is a sub-agent."
|
||||
3. It sends a batch of these strings to a ChromaDB-compatible vector database via its HTTP API.
|
||||
4. The fact's ID is stored as metadata alongside the vector in ChromaDB.
|
||||
- **Configuration:** The `embeddings.endpoint` must be a valid URL to the ChromaDB `/api/v1/collections/{name}/add` endpoint.
|
||||
- **Decoupling:** The plugin does **not** query ChromaDB. Its only responsibility is to push embeddings. Other plugins or services would be responsible for leveraging the vector store for retrieval-augmented generation (RAG).
|
||||
|
||||
---
|
||||
|
||||
## 9. Config Schema
|
||||
|
||||
The full `openclaw.plugin.json` schema for this plugin.
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "@vainplex/openclaw-knowledge-engine",
|
||||
"config": {
|
||||
"enabled": true,
|
||||
"workspace": "~/.clawd/plugins/knowledge-engine",
|
||||
"extraction": {
|
||||
"regex": {
|
||||
"enabled": true
|
||||
},
|
||||
"llm": {
|
||||
"enabled": true,
|
||||
"model": "mistral:7b",
|
||||
"endpoint": "http://localhost:11434/api/generate",
|
||||
"batchSize": 10,
|
||||
"cooldownMs": 30000
|
||||
}
|
||||
},
|
||||
"decay": {
|
||||
"enabled": true,
|
||||
"intervalHours": 24,
|
||||
"rate": 0.02
|
||||
},
|
||||
"embeddings": {
|
||||
"enabled": false,
|
||||
"endpoint": "http://localhost:8000/api/v1/collections/facts/add",
|
||||
"collectionName": "openclaw-facts",
|
||||
"syncIntervalMinutes": 15
|
||||
},
|
||||
"storage": {
|
||||
"maxEntities": 5000,
|
||||
"maxFacts": 10000,
|
||||
"writeDebounceMs": 15000
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Test Strategy
|
||||
|
||||
Testing will be comprehensive and follow the patterns of `@vainplex/openclaw-cortex`, using Node.js's built-in test runner.
|
||||
|
||||
- **Unit Tests:** Each class (`EntityExtractor`, `FactStore`, `LlmEnhancer`, etc.) will have its own test file (e.g., `fact-store.test.ts`). Tests will use mock objects for dependencies like the logger and file system.
|
||||
- **Integration Tests:** `hooks.test.ts` will test the end-to-end flow by simulating OpenClaw hook events and asserting that the correct file system changes occur.
|
||||
- **Configuration Tests:** `config.test.ts` will verify that default values are applied correctly and that invalid configurations are handled gracefully.
|
||||
- **CI/CD:** Tests will be run automatically in a CI pipeline on every commit.
|
||||
|
||||
---
|
||||
|
||||
## 11. Migration Guide
|
||||
|
||||
This section outlines the process for decommissioning the old Python scripts and migrating to the new plugin.
|
||||
|
||||
1. **Disable Old Services:** Stop and disable the `systemd` services and timers for `entity-extractor-stream.py`, `smart-extractor.py`, `knowledge-engine.py`, and `cortex-loops-stream.py`.
|
||||
```bash
|
||||
systemctl stop entity-extractor-stream.service smart-extractor.timer knowledge-engine.service cortex-loops.timer
|
||||
systemctl disable entity-extractor-stream.service smart-extractor.timer knowledge-engine.service cortex-loops.timer
|
||||
```
|
||||
|
||||
2. **Install the Plugin:** Install the `@vainplex/openclaw-knowledge-engine` plugin into OpenClaw according to standard procedures.
|
||||
|
||||
3. **Configure the Plugin:** Create a configuration file at `~/.clawd/plugins/openclaw-knowledge-engine.json` (or the equivalent path) using the schema from section 9. Ensure the `workspace` directory is set to the desired location.
|
||||
|
||||
4. **Data Migration (Optional):**
|
||||
- **Entities:** A one-time script (`./scripts/migrate-entities.js`) will be provided to convert the old `~/.cortex/knowledge/entities.json` format to the new `Entity` format defined in `src/types.ts`.
|
||||
- **Facts:** As the old `knowledge-engine.py` had a different structure and no durable fact store equivalent to `facts.json`, facts will not be migrated. The system will start with a fresh fact store.
|
||||
- **TypeDB:** No migration from TypeDB will be provided.
|
||||
|
||||
5. **Enable and Restart:** Enable the plugin in OpenClaw's main configuration and restart the OpenClaw instance. Monitor the logs for successful initialization.
|
||||
|
||||
---
|
||||
|
||||
## 12. Performance Requirements
|
||||
|
||||
- **Message Hook Overhead:** The synchronous part of the message hook (regex extraction) must complete in under **5ms** on average to avoid delaying the message processing pipeline.
|
||||
- **LLM Latency:** LLM processing is asynchronous and batched, so it does not block the main thread. However, the total time to analyze a batch should be logged and monitored.
|
||||
- **Memory Usage:** The plugin's heap size should not exceed **100MB** under normal load, assuming the configured `maxEntities` and `maxFacts` limits.
|
||||
- **CPU Usage:** Background maintenance tasks (decay, embeddings sync) should be staggered and have low CPU impact, consuming less than 5% of a single core while running.
|
||||
38
index.ts
Normal file
38
index.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
// index.ts
|
||||
|
||||
import { resolveConfig } from './src/config.js';
|
||||
import { HookManager } from './src/hooks.js';
|
||||
import type { OpenClawPluginApi } from './src/types.js';
|
||||
|
||||
// The main entry point for the OpenClaw plugin.
|
||||
// This function is called by the OpenClaw host during plugin loading.
|
||||
export default (api: OpenClawPluginApi, context: { workspace: string }): void => {
|
||||
const { pluginConfig, logger } = api;
|
||||
const { workspace: openClawWorkspace } = context;
|
||||
|
||||
// 1. Resolve and validate the configuration
|
||||
const config = resolveConfig(pluginConfig, logger, openClawWorkspace);
|
||||
|
||||
if (!config) {
|
||||
logger.error('Failed to initialize Knowledge Engine: Invalid configuration. The plugin will be disabled.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!config.enabled) {
|
||||
logger.info('Knowledge Engine is disabled in the configuration.');
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Initialize the Hook Manager with the resolved config
|
||||
try {
|
||||
const hookManager = new HookManager(api, config);
|
||||
|
||||
// 3. Register all the event hooks
|
||||
hookManager.registerHooks();
|
||||
|
||||
logger.info('Knowledge Engine plugin initialized successfully.');
|
||||
|
||||
} catch (err) {
|
||||
logger.error('An unexpected error occurred during Knowledge Engine initialization.', err as Error);
|
||||
}
|
||||
};
|
||||
125
openclaw.plugin.json
Normal file
125
openclaw.plugin.json
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
{
|
||||
"id": "@vainplex/openclaw-knowledge-engine",
|
||||
"config": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Whether the knowledge engine plugin is enabled."
|
||||
},
|
||||
"workspace": {
|
||||
"type": "string",
|
||||
"default": "~/.clawd/plugins/knowledge-engine",
|
||||
"description": "The directory to store knowledge files (entities.json, facts.json)."
|
||||
},
|
||||
"extraction": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"regex": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable or disable high-speed regex-based entity extraction."
|
||||
}
|
||||
}
|
||||
},
|
||||
"llm": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable or disable high-fidelity LLM-based entity and fact extraction."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"default": "mistral:7b",
|
||||
"description": "The model name to use for the LLM API call (e.g., Ollama model)."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"default": "http://localhost:11434/api/generate",
|
||||
"description": "The HTTP endpoint for the LLM generation API."
|
||||
},
|
||||
"batchSize": {
|
||||
"type": "number",
|
||||
"default": 10,
|
||||
"description": "Number of messages to batch together before sending to the LLM."
|
||||
},
|
||||
"cooldownMs": {
|
||||
"type": "number",
|
||||
"default": 30000,
|
||||
"description": "Milliseconds to wait after the last message before sending a batch to the LLM."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"decay": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable or disable the periodic decay of fact relevance."
|
||||
},
|
||||
"intervalHours": {
|
||||
"type": "number",
|
||||
"default": 24,
|
||||
"description": "How often (in hours) to run the decay process."
|
||||
},
|
||||
"rate": {
|
||||
"type": "number",
|
||||
"default": 0.02,
|
||||
"description": "The percentage of relevance to decay in each interval (e.g., 0.02 is 2%)."
|
||||
}
|
||||
}
|
||||
},
|
||||
"embeddings": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Enable or disable syncing of facts to a vector database."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"default": "http://localhost:8000/api/v1/collections/facts/add",
|
||||
"description": "The HTTP endpoint for the vector database's add API (ChromaDB compatible)."
|
||||
},
|
||||
"collectionName": {
|
||||
"type": "string",
|
||||
"default": "openclaw-facts",
|
||||
"description": "The name of the collection to use in the vector database."
|
||||
},
|
||||
"syncIntervalMinutes": {
|
||||
"type": "number",
|
||||
"default": 15,
|
||||
"description": "How often (in minutes) to sync new facts to the vector database."
|
||||
}
|
||||
}
|
||||
},
|
||||
"storage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"maxEntities": {
|
||||
"type": "number",
|
||||
"default": 5000,
|
||||
"description": "The maximum number of entities to store before pruning."
|
||||
},
|
||||
"maxFacts": {
|
||||
"type": "number",
|
||||
"default": 10000,
|
||||
"description": "The maximum number of facts to store before pruning."
|
||||
},
|
||||
"writeDebounceMs": {
|
||||
"type": "number",
|
||||
"default": 15000,
|
||||
"description": "Milliseconds to wait after a change before writing data to disk."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1686
package-lock.json
generated
Normal file
1686
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
36
package.json
Normal file
36
package.json
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"name": "@vainplex/openclaw-knowledge-engine",
|
||||
"version": "0.1.0",
|
||||
"description": "An OpenClaw plugin for real-time and batch knowledge extraction from conversational data.",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"test": "tsx --test test/*.test.ts",
|
||||
"lint": "eslint . --ext .ts",
|
||||
"prepublishOnly": "npm run build"
|
||||
},
|
||||
"keywords": [
|
||||
"openclaw",
|
||||
"plugin",
|
||||
"knowledge-extraction",
|
||||
"nlp",
|
||||
"entity-extraction"
|
||||
],
|
||||
"author": "Vainplex",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/your-repo/openclaw-knowledge-engine.git"
|
||||
},
|
||||
"openclaw": {
|
||||
"id": "@vainplex/openclaw-knowledge-engine"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.24",
|
||||
"eslint": "^8.57.0",
|
||||
"typescript": "^5.3.3",
|
||||
"tsx": "^4.7.1"
|
||||
},
|
||||
"type": "module"
|
||||
}
|
||||
180
src/config.ts
Normal file
180
src/config.ts
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
// src/config.ts
|
||||
|
||||
import * as path from 'node:path';
|
||||
import { KnowledgeConfig, Logger } from './types.js';
|
||||
|
||||
/**
|
||||
* The default configuration values for the plugin.
|
||||
* These are merged with the user-provided configuration.
|
||||
*/
|
||||
export const DEFAULT_CONFIG: Omit<KnowledgeConfig, 'workspace'> = {
|
||||
enabled: true,
|
||||
extraction: {
|
||||
regex: { enabled: true },
|
||||
llm: {
|
||||
enabled: true,
|
||||
model: 'mistral:7b',
|
||||
endpoint: 'http://localhost:11434/api/generate',
|
||||
batchSize: 10,
|
||||
cooldownMs: 30000,
|
||||
},
|
||||
},
|
||||
decay: {
|
||||
enabled: true,
|
||||
intervalHours: 24,
|
||||
rate: 0.02,
|
||||
},
|
||||
embeddings: {
|
||||
enabled: false,
|
||||
endpoint: 'http://localhost:8000/api/v1/collections/facts/add',
|
||||
collectionName: 'openclaw-facts',
|
||||
syncIntervalMinutes: 15,
|
||||
},
|
||||
storage: {
|
||||
maxEntities: 5000,
|
||||
maxFacts: 10000,
|
||||
writeDebounceMs: 15000,
|
||||
},
|
||||
};
|
||||
|
||||
/** Type-safe deep merge: spread source into target for Record values. */
|
||||
function deepMerge<T extends Record<string, unknown>>(
|
||||
target: T,
|
||||
source: Record<string, unknown>
|
||||
): T {
|
||||
const result = { ...target } as Record<string, unknown>;
|
||||
for (const key of Object.keys(source)) {
|
||||
const srcVal = source[key];
|
||||
const tgtVal = result[key];
|
||||
if (isPlainObject(srcVal) && isPlainObject(tgtVal)) {
|
||||
result[key] = deepMerge(
|
||||
tgtVal as Record<string, unknown>,
|
||||
srcVal as Record<string, unknown>
|
||||
);
|
||||
} else if (srcVal !== undefined) {
|
||||
result[key] = srcVal;
|
||||
}
|
||||
}
|
||||
return result as T;
|
||||
}
|
||||
|
||||
function isPlainObject(val: unknown): val is Record<string, unknown> {
|
||||
return typeof val === 'object' && val !== null && !Array.isArray(val);
|
||||
}
|
||||
|
||||
/** Merge user config over defaults and resolve workspace. */
|
||||
function mergeConfigDefaults(
|
||||
userConfig: Record<string, unknown>,
|
||||
openClawWorkspace: string
|
||||
): KnowledgeConfig {
|
||||
const merged = deepMerge(
|
||||
DEFAULT_CONFIG as unknown as Record<string, unknown>,
|
||||
userConfig
|
||||
);
|
||||
const ws = typeof userConfig.workspace === 'string' && userConfig.workspace
|
||||
? userConfig.workspace
|
||||
: path.join(openClawWorkspace, 'knowledge-engine');
|
||||
return { ...merged, workspace: ws } as KnowledgeConfig;
|
||||
}
|
||||
|
||||
/** Replace a leading tilde with the user's home directory. */
|
||||
function resolveTilde(ws: string, logger: Logger, fallback: string): string {
|
||||
if (!ws.startsWith('~')) return ws;
|
||||
const homeDir = process.env.HOME || process.env.USERPROFILE;
|
||||
if (homeDir) return path.join(homeDir, ws.slice(1));
|
||||
logger.warn('Could not resolve home directory for workspace path.');
|
||||
return fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves and validates the plugin's configuration.
|
||||
*
|
||||
* @param userConfig The user-provided configuration from OpenClaw's pluginConfig.
|
||||
* @param logger A logger instance for logging warnings or errors.
|
||||
* @param openClawWorkspace The root workspace directory provided by OpenClaw.
|
||||
* @returns A fully resolved KnowledgeConfig, or null if validation fails.
|
||||
*/
|
||||
export function resolveConfig(
|
||||
userConfig: Record<string, unknown>,
|
||||
logger: Logger,
|
||||
openClawWorkspace: string
|
||||
): KnowledgeConfig | null {
|
||||
const config = mergeConfigDefaults(userConfig, openClawWorkspace);
|
||||
const fallbackWs = path.join(openClawWorkspace, 'knowledge-engine');
|
||||
config.workspace = resolveTilde(config.workspace, logger, fallbackWs);
|
||||
|
||||
const errors = validateConfig(config);
|
||||
if (errors.length > 0) {
|
||||
errors.forEach(e => logger.error(`Invalid configuration: ${e}`));
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.info('Knowledge Engine configuration resolved successfully.');
|
||||
return config;
|
||||
}
|
||||
|
||||
// ── Validation ──────────────────────────────────────────────
|
||||
|
||||
function validateConfig(config: KnowledgeConfig): string[] {
|
||||
return [
|
||||
...validateRoot(config),
|
||||
...validateExtraction(config.extraction),
|
||||
...validateDecay(config.decay),
|
||||
...validateEmbeddings(config.embeddings),
|
||||
...validateStorage(config.storage),
|
||||
];
|
||||
}
|
||||
|
||||
function validateRoot(c: KnowledgeConfig): string[] {
|
||||
const errs: string[] = [];
|
||||
if (typeof c.enabled !== 'boolean') errs.push('"enabled" must be a boolean.');
|
||||
if (typeof c.workspace !== 'string' || c.workspace.trim() === '') {
|
||||
errs.push('"workspace" must be a non-empty string.');
|
||||
}
|
||||
return errs;
|
||||
}
|
||||
|
||||
function validateExtraction(ext: KnowledgeConfig['extraction']): string[] {
|
||||
const errs: string[] = [];
|
||||
if (ext.llm.enabled) {
|
||||
if (!isValidHttpUrl(ext.llm.endpoint)) {
|
||||
errs.push('"extraction.llm.endpoint" must be a valid HTTP/S URL.');
|
||||
}
|
||||
if ((ext.llm.batchSize ?? 0) < 1) {
|
||||
errs.push('"extraction.llm.batchSize" must be at least 1.');
|
||||
}
|
||||
}
|
||||
return errs;
|
||||
}
|
||||
|
||||
function validateDecay(d: KnowledgeConfig['decay']): string[] {
|
||||
const errs: string[] = [];
|
||||
if (d.rate < 0 || d.rate > 1) errs.push('"decay.rate" must be between 0 and 1.');
|
||||
if ((d.intervalHours ?? 0) <= 0) errs.push('"decay.intervalHours" must be greater than 0.');
|
||||
return errs;
|
||||
}
|
||||
|
||||
function validateEmbeddings(e: KnowledgeConfig['embeddings']): string[] {
|
||||
const errs: string[] = [];
|
||||
if (e.enabled && !isValidHttpUrl(e.endpoint)) {
|
||||
errs.push('"embeddings.endpoint" must be a valid HTTP/S URL.');
|
||||
}
|
||||
return errs;
|
||||
}
|
||||
|
||||
function validateStorage(s: KnowledgeConfig['storage']): string[] {
|
||||
const errs: string[] = [];
|
||||
if ((s.writeDebounceMs ?? 0) < 0) {
|
||||
errs.push('"storage.writeDebounceMs" must be a non-negative number.');
|
||||
}
|
||||
return errs;
|
||||
}
|
||||
|
||||
function isValidHttpUrl(str: string): boolean {
|
||||
try {
|
||||
const url = new URL(str);
|
||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
82
src/embeddings.ts
Normal file
82
src/embeddings.ts
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
// src/embeddings.ts
|
||||
|
||||
import { Fact, KnowledgeConfig, Logger } from './types.js';
|
||||
import { httpPost } from './http-client.js';
|
||||
|
||||
/** ChromaDB v2 API payload format. */
|
||||
interface ChromaPayload {
|
||||
ids: string[];
|
||||
documents: string[];
|
||||
metadatas: Record<string, string>[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Manages optional integration with a ChromaDB-compatible vector database.
|
||||
*/
|
||||
export class Embeddings {
|
||||
private readonly config: KnowledgeConfig['embeddings'];
|
||||
private readonly logger: Logger;
|
||||
|
||||
constructor(config: KnowledgeConfig['embeddings'], logger: Logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/** Checks if the embeddings service is enabled. */
|
||||
public isEnabled(): boolean {
|
||||
return this.config.enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* Syncs a batch of facts to the vector database.
|
||||
* @returns The number of successfully synced facts.
|
||||
*/
|
||||
public async sync(facts: Fact[]): Promise<number> {
|
||||
if (!this.isEnabled() || facts.length === 0) return 0;
|
||||
|
||||
this.logger.info(`Starting embedding sync for ${facts.length} facts.`);
|
||||
const payload = this.constructChromaPayload(facts);
|
||||
const url = this.buildEndpointUrl();
|
||||
|
||||
try {
|
||||
await httpPost(url, payload);
|
||||
this.logger.info(`Successfully synced ${facts.length} facts to ChromaDB.`);
|
||||
return facts.length;
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to sync embeddings to ChromaDB.', err as Error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Builds the full endpoint URL with collection name substituted. */
|
||||
private buildEndpointUrl(): string {
|
||||
return this.config.endpoint
|
||||
.replace('{name}', this.config.collectionName)
|
||||
.replace('//', '//') // preserve protocol double-slash
|
||||
.replace(/([^:])\/\//g, '$1/'); // collapse any other double-slashes
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the payload for ChromaDB v2 API.
|
||||
* Metadata values are all strings (v2 requirement).
|
||||
*/
|
||||
private constructChromaPayload(facts: Fact[]): ChromaPayload {
|
||||
const payload: ChromaPayload = { ids: [], documents: [], metadatas: [] };
|
||||
|
||||
for (const fact of facts) {
|
||||
payload.ids.push(fact.id);
|
||||
payload.documents.push(
|
||||
`${fact.subject} ${fact.predicate.replace(/-/g, ' ')} ${fact.object}.`
|
||||
);
|
||||
payload.metadatas.push({
|
||||
subject: fact.subject,
|
||||
predicate: fact.predicate,
|
||||
object: fact.object,
|
||||
source: fact.source,
|
||||
createdAt: fact.createdAt,
|
||||
});
|
||||
}
|
||||
|
||||
return payload;
|
||||
}
|
||||
}
|
||||
137
src/entity-extractor.ts
Normal file
137
src/entity-extractor.ts
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
// src/entity-extractor.ts
|
||||
|
||||
import { Entity, Logger } from './types.js';
|
||||
import { REGEX_PATTERNS } from './patterns.js';
|
||||
|
||||
// A map to associate regex pattern names with entity types.
|
||||
const PATTERN_TYPE_MAP: Record<string, Entity['type']> = {
|
||||
email: 'email',
|
||||
url: 'url',
|
||||
iso_date: 'date',
|
||||
common_date: 'date',
|
||||
german_date: 'date',
|
||||
english_date: 'date',
|
||||
proper_noun: 'unknown',
|
||||
product_name: 'product',
|
||||
organization_suffix: 'organization',
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts entities from text using predefined regular expressions.
|
||||
*/
|
||||
export class EntityExtractor {
|
||||
private readonly logger: Logger;
|
||||
|
||||
constructor(logger: Logger) {
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts entities from a given text based on the regex patterns.
|
||||
* @param text The input text to process.
|
||||
* @returns An array of found entities.
|
||||
*/
|
||||
public extract(text: string): Entity[] {
|
||||
const foundEntities: Map<string, Entity> = new Map();
|
||||
|
||||
for (const key in REGEX_PATTERNS) {
|
||||
// Each access returns a fresh RegExp (via Proxy), avoiding /g state-bleed.
|
||||
const regex = REGEX_PATTERNS[key];
|
||||
if (!regex.global) {
|
||||
this.logger.warn(`Regex for "${key}" is not global. Skipping.`);
|
||||
continue;
|
||||
}
|
||||
const entityType = PATTERN_TYPE_MAP[key] || 'unknown';
|
||||
let match;
|
||||
while ((match = regex.exec(text)) !== null) {
|
||||
const value = match[0].trim();
|
||||
if (!value) continue;
|
||||
this.processMatch(key, value, entityType, foundEntities);
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(foundEntities.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a single regex match and upserts it into the entity map.
|
||||
*/
|
||||
private processMatch(
|
||||
_key: string,
|
||||
value: string,
|
||||
entityType: Entity['type'],
|
||||
entities: Map<string, Entity>
|
||||
): void {
|
||||
const canonicalValue = this.canonicalize(value, entityType);
|
||||
const id = `${entityType}:${canonicalValue.toLowerCase().replace(/\s+/g, '-')}`;
|
||||
|
||||
if (entities.has(id)) {
|
||||
const existing = entities.get(id)!;
|
||||
if (!existing.mentions.includes(value)) existing.mentions.push(value);
|
||||
existing.count++;
|
||||
if (!existing.source.includes('regex')) existing.source.push('regex');
|
||||
} else {
|
||||
entities.set(id, {
|
||||
id,
|
||||
type: entityType,
|
||||
value: canonicalValue,
|
||||
mentions: [value],
|
||||
count: 1,
|
||||
importance: this.calculateInitialImportance(entityType, value),
|
||||
lastSeen: new Date().toISOString(),
|
||||
source: ['regex'],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans and standardizes an entity value based on its type.
|
||||
*/
|
||||
private canonicalize(value: string, type: Entity['type']): string {
|
||||
if (type === 'organization') {
|
||||
const suffixes = /,?\s?(?:Inc\.|LLC|Corp\.|GmbH|AG|Ltd\.)$/i;
|
||||
return value.replace(suffixes, '').trim();
|
||||
}
|
||||
return value.replace(/[.,!?;:]$/, '').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates an initial importance score for an entity.
|
||||
*/
|
||||
private calculateInitialImportance(type: Entity['type'], value: string): number {
|
||||
switch (type) {
|
||||
case 'organization': return 0.8;
|
||||
case 'person': return 0.7;
|
||||
case 'product': return 0.6;
|
||||
case 'location': return 0.5;
|
||||
case 'date':
|
||||
case 'email':
|
||||
case 'url': return 0.4;
|
||||
default: return value.split(/\s|-/).length > 1 ? 0.5 : 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges two lists of entities by ID.
|
||||
*/
|
||||
public static mergeEntities(listA: Entity[], listB: Entity[]): Entity[] {
|
||||
const merged: Map<string, Entity> = new Map();
|
||||
for (const e of listA) merged.set(e.id, { ...e });
|
||||
|
||||
for (const entity of listB) {
|
||||
if (merged.has(entity.id)) {
|
||||
const ex = merged.get(entity.id)!;
|
||||
ex.count += entity.count;
|
||||
ex.mentions = [...new Set([...ex.mentions, ...entity.mentions])];
|
||||
ex.source = [...new Set([...ex.source, ...entity.source])];
|
||||
ex.lastSeen = new Date() > new Date(ex.lastSeen)
|
||||
? new Date().toISOString() : ex.lastSeen;
|
||||
ex.importance = Math.max(ex.importance, entity.importance);
|
||||
} else {
|
||||
merged.set(entity.id, { ...entity });
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(merged.values());
|
||||
}
|
||||
}
|
||||
250
src/fact-store.ts
Normal file
250
src/fact-store.ts
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
// src/fact-store.ts
|
||||
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { AtomicStorage } from './storage.js';
|
||||
import type { Fact, FactsData, KnowledgeConfig, Logger } from './types.js';
|
||||
|
||||
/**
|
||||
* Manages an in-memory and on-disk store of structured facts.
|
||||
* Provides methods for loading, querying, modifying, and persisting facts.
|
||||
*/
|
||||
export class FactStore {
|
||||
private readonly storage: AtomicStorage;
|
||||
private readonly config: KnowledgeConfig['storage'];
|
||||
private readonly logger: Logger;
|
||||
private facts: Map<string, Fact> = new Map();
|
||||
private isLoaded: boolean = false;
|
||||
|
||||
public readonly commit: () => Promise<void>;
|
||||
|
||||
constructor(
|
||||
workspace: string,
|
||||
config: KnowledgeConfig['storage'],
|
||||
logger: Logger
|
||||
) {
|
||||
this.storage = new AtomicStorage(workspace, logger);
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
// Create a debounced version of the persist method.
|
||||
this.commit = AtomicStorage.debounce(
|
||||
this.persist.bind(this),
|
||||
this.config.writeDebounceMs
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads facts from the `facts.json` file into the in-memory store.
|
||||
* If the file doesn't exist, it initializes an empty store.
|
||||
*/
|
||||
public async load(): Promise<void> {
|
||||
if (this.isLoaded) {
|
||||
this.logger.debug('Fact store is already loaded.');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.storage.init();
|
||||
const data = await this.storage.readJson<FactsData>('facts.json');
|
||||
if (data && Array.isArray(data.facts)) {
|
||||
this.facts = new Map(data.facts.map(fact => [fact.id, fact]));
|
||||
this.logger.info(`Loaded ${this.facts.size} facts from storage.`);
|
||||
} else {
|
||||
this.logger.info('No existing fact store found. Initializing a new one.');
|
||||
this.facts = new Map();
|
||||
}
|
||||
this.isLoaded = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new fact to the store or updates an existing one based on content.
|
||||
* @param newFactData The data for the new fact, excluding metadata fields.
|
||||
* @returns The newly created or found Fact object.
|
||||
*/
|
||||
public addFact(
|
||||
newFactData: Omit<Fact, 'id' | 'createdAt' | 'lastAccessed' | 'relevance'>
|
||||
): Fact {
|
||||
if (!this.isLoaded) {
|
||||
throw new Error('FactStore has not been loaded yet. Call load() first.');
|
||||
}
|
||||
const now = new Date().toISOString();
|
||||
|
||||
// Check if a similar fact already exists to avoid duplicates
|
||||
for (const existingFact of this.facts.values()) {
|
||||
if (
|
||||
existingFact.subject === newFactData.subject &&
|
||||
existingFact.predicate === newFactData.predicate &&
|
||||
existingFact.object === newFactData.object
|
||||
) {
|
||||
// Fact already exists, let's just boost its relevance and update timestamp
|
||||
existingFact.relevance = this.boostRelevance(existingFact.relevance);
|
||||
existingFact.lastAccessed = now;
|
||||
this.commit();
|
||||
return existingFact;
|
||||
}
|
||||
}
|
||||
|
||||
const newFact: Fact = {
|
||||
...newFactData,
|
||||
id: randomUUID(),
|
||||
createdAt: now,
|
||||
lastAccessed: now,
|
||||
relevance: 1.0, // New facts start with maximum relevance
|
||||
};
|
||||
|
||||
this.facts.set(newFact.id, newFact);
|
||||
this.prune(); // Check if we need to prune old facts
|
||||
this.commit();
|
||||
return newFact;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a fact by its unique ID.
|
||||
* @param id The UUID of the fact.
|
||||
* @returns The Fact object, or undefined if not found.
|
||||
*/
|
||||
public getFact(id: string): Fact | undefined {
|
||||
const fact = this.facts.get(id);
|
||||
if (fact) {
|
||||
fact.lastAccessed = new Date().toISOString();
|
||||
fact.relevance = this.boostRelevance(fact.relevance);
|
||||
this.commit();
|
||||
}
|
||||
return fact;
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the fact store based on subject, predicate, or object.
|
||||
* @param query An object with optional subject, predicate, and/or object to match.
|
||||
* @returns An array of matching facts, sorted by relevance.
|
||||
*/
|
||||
public query(query: { subject?: string; predicate?: string; object?: string }): Fact[] {
|
||||
const results: Fact[] = [];
|
||||
for (const fact of this.facts.values()) {
|
||||
const subjectMatch = !query.subject || fact.subject === query.subject;
|
||||
const predicateMatch = !query.predicate || fact.predicate === query.predicate;
|
||||
const objectMatch = !query.object || fact.object === query.object;
|
||||
|
||||
if (subjectMatch && predicateMatch && objectMatch) {
|
||||
results.push(fact);
|
||||
}
|
||||
}
|
||||
// Sort by relevance, descending
|
||||
return results.sort((a, b) => b.relevance - a.relevance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a decay factor to the relevance score of all facts.
|
||||
* @param rate The decay rate (e.g., 0.05 for 5%).
|
||||
* @returns An object with the count of decayed facts.
|
||||
*/
|
||||
public decayFacts(rate: number): { decayedCount: number } {
|
||||
let decayedCount = 0;
|
||||
const minRelevance = 0.1; // Floor to prevent facts from disappearing completely
|
||||
|
||||
for (const fact of this.facts.values()) {
|
||||
const newRelevance = fact.relevance * (1 - rate);
|
||||
if (newRelevance !== fact.relevance) {
|
||||
fact.relevance = Math.max(minRelevance, newRelevance);
|
||||
decayedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (decayedCount > 0) {
|
||||
this.logger.info(`Applied decay rate of ${rate * 100}% to ${decayedCount} facts.`);
|
||||
this.commit();
|
||||
}
|
||||
return { decayedCount };
|
||||
}
|
||||
|
||||
/**
|
||||
* Persists the current in-memory fact store to `facts.json`.
|
||||
*/
|
||||
private async persist(): Promise<void> {
|
||||
if (!this.isLoaded) {
|
||||
this.logger.warn('Attempted to persist fact store before it was loaded. Aborting.');
|
||||
return;
|
||||
}
|
||||
|
||||
const data: FactsData = {
|
||||
updated: new Date().toISOString(),
|
||||
facts: Array.from(this.facts.values()),
|
||||
};
|
||||
|
||||
try {
|
||||
await this.storage.writeJson('facts.json', data);
|
||||
this.logger.debug(`Successfully persisted ${data.facts.length} facts.`);
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to persist fact store.', err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the least relevant facts if the store exceeds its configured max size.
|
||||
*/
|
||||
private prune(): void {
|
||||
const factCount = this.facts.size;
|
||||
if (factCount <= this.config.maxFacts) {
|
||||
return;
|
||||
}
|
||||
|
||||
const factsToPrune = factCount - this.config.maxFacts;
|
||||
if (factsToPrune <= 0) return;
|
||||
|
||||
// Get all facts, sort by relevance (ascending) and then by lastAccessed (ascending)
|
||||
const sortedFacts = Array.from(this.facts.values()).sort((a, b) => {
|
||||
if (a.relevance !== b.relevance) {
|
||||
return a.relevance - b.relevance;
|
||||
}
|
||||
return new Date(a.lastAccessed).getTime() - new Date(b.lastAccessed).getTime();
|
||||
});
|
||||
|
||||
for (let i = 0; i < factsToPrune; i++) {
|
||||
this.facts.delete(sortedFacts[i].id);
|
||||
}
|
||||
|
||||
this.logger.info(`Pruned ${factsToPrune} least relevant facts to maintain store size.`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Boosts the relevance of a fact upon access.
|
||||
* @param currentRelevance The current relevance score.
|
||||
* @returns The new, boosted relevance score.
|
||||
*/
|
||||
private boostRelevance(currentRelevance: number): number {
|
||||
// Push the relevance 50% closer to 1.0
|
||||
const boost = (1.0 - currentRelevance) * 0.5;
|
||||
return Math.min(1.0, currentRelevance + boost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of all facts that have not been embedded yet.
|
||||
*/
|
||||
public getUnembeddedFacts(): Fact[] {
|
||||
const results: Fact[] = [];
|
||||
for (const fact of this.facts.values()) {
|
||||
if (!fact.embedded) {
|
||||
results.push(fact);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks a list of facts as having been embedded.
|
||||
* @param factIds An array of fact IDs to update.
|
||||
*/
|
||||
public markFactsAsEmbedded(factIds: string[]): void {
|
||||
const now = new Date().toISOString();
|
||||
let updatedCount = 0;
|
||||
for (const id of factIds) {
|
||||
const fact = this.facts.get(id);
|
||||
if (fact) {
|
||||
fact.embedded = now;
|
||||
updatedCount++;
|
||||
}
|
||||
}
|
||||
if (updatedCount > 0) {
|
||||
this.commit();
|
||||
}
|
||||
}
|
||||
}
|
||||
124
src/hooks.ts
Normal file
124
src/hooks.ts
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
// src/hooks.ts
|
||||
|
||||
import {
|
||||
OpenClawPluginApi,
|
||||
HookEvent,
|
||||
KnowledgeConfig,
|
||||
Logger,
|
||||
} from './types.js';
|
||||
|
||||
import { EntityExtractor } from './entity-extractor.js';
|
||||
import { FactStore } from './fact-store.js';
|
||||
import { LlmEnhancer } from './llm-enhancer.js';
|
||||
import { Maintenance } from './maintenance.js';
|
||||
import { Embeddings } from './embeddings.js';
|
||||
|
||||
/**
|
||||
* Manages the registration and orchestration of all plugin hooks.
|
||||
*/
|
||||
export class HookManager {
|
||||
private readonly api: OpenClawPluginApi;
|
||||
private readonly config: KnowledgeConfig;
|
||||
private readonly logger: Logger;
|
||||
|
||||
private entityExtractor: EntityExtractor;
|
||||
private factStore: FactStore;
|
||||
private llmEnhancer?: LlmEnhancer;
|
||||
private maintenance?: Maintenance;
|
||||
|
||||
constructor(api: OpenClawPluginApi, config: KnowledgeConfig) {
|
||||
this.api = api;
|
||||
this.config = config;
|
||||
this.logger = api.logger;
|
||||
|
||||
this.entityExtractor = new EntityExtractor(this.logger);
|
||||
this.factStore = new FactStore(
|
||||
this.config.workspace,
|
||||
this.config.storage,
|
||||
this.logger
|
||||
);
|
||||
|
||||
if (this.config.extraction.llm.enabled) {
|
||||
this.llmEnhancer = new LlmEnhancer(this.config.extraction.llm, this.logger);
|
||||
}
|
||||
}
|
||||
|
||||
/** Registers all the necessary hooks with the OpenClaw host. */
|
||||
public registerHooks(): void {
|
||||
if (!this.config.enabled) {
|
||||
this.logger.info('Knowledge Engine is disabled. No hooks registered.');
|
||||
return;
|
||||
}
|
||||
|
||||
this.api.on('session_start', this.onSessionStart.bind(this), { priority: 200 });
|
||||
this.api.on('message_received', this.onMessage.bind(this), { priority: 100 });
|
||||
this.api.on('message_sent', this.onMessage.bind(this), { priority: 100 });
|
||||
this.api.on('gateway_stop', this.onShutdown.bind(this), { priority: 900 });
|
||||
|
||||
this.logger.info('Registered all Knowledge Engine hooks.');
|
||||
}
|
||||
|
||||
/** Handler for the `session_start` hook. */
|
||||
private async onSessionStart(): Promise<void> {
|
||||
this.logger.info('Knowledge Engine starting up...');
|
||||
await this.factStore.load();
|
||||
|
||||
const embeddings = this.config.embeddings.enabled
|
||||
? new Embeddings(this.config.embeddings, this.logger)
|
||||
: undefined;
|
||||
|
||||
this.maintenance = new Maintenance(
|
||||
this.config, this.logger, this.factStore, embeddings
|
||||
);
|
||||
this.maintenance.start();
|
||||
}
|
||||
|
||||
/** Handler for `gateway_stop` — cleans up timers and flushes state. */
|
||||
private async onShutdown(): Promise<void> {
|
||||
this.logger.info('Knowledge Engine shutting down...');
|
||||
this.maintenance?.stop();
|
||||
this.llmEnhancer?.clearTimers();
|
||||
this.logger.info('Knowledge Engine shutdown complete.');
|
||||
}
|
||||
|
||||
/** Handler for `message_received` and `message_sent` hooks. */
|
||||
private async onMessage(event: HookEvent): Promise<void> {
|
||||
const text = event.content || event.message || event.text;
|
||||
if (typeof text !== 'string' || text.trim() === '') return;
|
||||
|
||||
this.logger.debug(`Processing message: "${text.substring(0, 50)}..."`);
|
||||
|
||||
if (this.config.extraction.regex.enabled) {
|
||||
const entities = this.entityExtractor.extract(text);
|
||||
if (entities.length > 0) {
|
||||
this.logger.info(`Extracted ${entities.length} entities via regex.`);
|
||||
}
|
||||
}
|
||||
|
||||
if (this.llmEnhancer) {
|
||||
const messageId = `msg-${Date.now()}`;
|
||||
this.llmEnhancer.addToBatch({ id: messageId, text });
|
||||
this.processLlmBatchWhenReady();
|
||||
}
|
||||
}
|
||||
|
||||
/** Fire-and-forget: processes LLM batch results when available. */
|
||||
private async processLlmBatchWhenReady(): Promise<void> {
|
||||
if (!this.llmEnhancer) return;
|
||||
|
||||
const result = await this.llmEnhancer.sendBatch();
|
||||
if (!result) return;
|
||||
|
||||
if (result.facts.length > 0) {
|
||||
this.logger.info(`Adding ${result.facts.length} LLM facts.`);
|
||||
for (const f of result.facts) {
|
||||
this.factStore.addFact({
|
||||
subject: f.subject,
|
||||
predicate: f.predicate,
|
||||
object: f.object,
|
||||
source: 'extracted-llm',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
74
src/http-client.ts
Normal file
74
src/http-client.ts
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
// src/http-client.ts
|
||||
|
||||
import * as http from 'node:http';
|
||||
import * as https from 'node:https';
|
||||
|
||||
interface HttpPostOptions {
|
||||
hostname: string;
|
||||
port: string;
|
||||
path: string;
|
||||
method: 'POST';
|
||||
headers: Record<string, string | number>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects the correct HTTP/HTTPS module based on the URL protocol.
|
||||
*/
|
||||
function selectTransport(protocol: string): typeof http | typeof https {
|
||||
return protocol === 'https:' ? https : http;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds request options from a URL and payload.
|
||||
*/
|
||||
function buildRequestOptions(url: URL, payload: string): HttpPostOptions {
|
||||
return {
|
||||
hostname: url.hostname,
|
||||
port: url.port,
|
||||
path: url.pathname + url.search,
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': Buffer.byteLength(payload),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes an HTTP or HTTPS POST request, auto-selecting the
|
||||
* transport based on the URL's protocol.
|
||||
*
|
||||
* @param url The full URL string to POST to.
|
||||
* @param body The payload object to JSON-serialize and send.
|
||||
* @returns A promise resolving with the response body string.
|
||||
*/
|
||||
export function httpPost(url: string, body: unknown): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const parsed = new URL(url);
|
||||
const payload = JSON.stringify(body);
|
||||
const options = buildRequestOptions(parsed, payload);
|
||||
const transport = selectTransport(parsed.protocol);
|
||||
|
||||
const req = transport.request(options, (res) => {
|
||||
let data = '';
|
||||
res.setEncoding('utf8');
|
||||
res.on('data', (chunk: string) => { data += chunk; });
|
||||
res.on('end', () => {
|
||||
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) {
|
||||
resolve(data);
|
||||
} else {
|
||||
reject(new Error(
|
||||
`HTTP request failed with status ${res.statusCode}: ${data}`
|
||||
));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
req.on('error', (e) => {
|
||||
reject(new Error(`HTTP request error: ${e.message}`));
|
||||
});
|
||||
|
||||
req.write(payload);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
187
src/llm-enhancer.ts
Normal file
187
src/llm-enhancer.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
// src/llm-enhancer.ts
|
||||
|
||||
import { Entity, Fact, KnowledgeConfig, Logger } from './types.js';
|
||||
import { httpPost } from './http-client.js';
|
||||
|
||||
interface LlmBatchItem {
|
||||
id: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface LlmExtractionResult {
|
||||
entities: Omit<Entity, 'id' | 'mentions' | 'count' | 'lastSeen' | 'source'>[];
|
||||
facts: Omit<Fact, 'id' | 'relevance' | 'createdAt' | 'lastAccessed' | 'source'>[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Manages batched requests to an external LLM for entity and fact extraction.
|
||||
*/
|
||||
export class LlmEnhancer {
|
||||
private readonly config: KnowledgeConfig['extraction']['llm'];
|
||||
private readonly logger: Logger;
|
||||
private batch: LlmBatchItem[] = [];
|
||||
private cooldownTimeout: NodeJS.Timeout | null = null;
|
||||
|
||||
constructor(config: KnowledgeConfig['extraction']['llm'], logger: Logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a message to the current batch.
|
||||
* Triggers a batch send (with proper error handling) when the size is reached.
|
||||
*/
|
||||
public addToBatch(item: LlmBatchItem): void {
|
||||
if (!this.config.enabled) return;
|
||||
|
||||
this.batch.push(item);
|
||||
this.logger.debug(`Added message ${item.id} to LLM batch. Current size: ${this.batch.length}`);
|
||||
|
||||
if (this.batch.length >= this.config.batchSize) {
|
||||
this.logger.info(`LLM batch size reached (${this.config.batchSize}). Sending immediately.`);
|
||||
// S1: properly await and catch errors from sendBatch
|
||||
this.sendBatch().catch(err => {
|
||||
this.logger.error('Error sending LLM batch.', err as Error);
|
||||
});
|
||||
} else {
|
||||
this.resetCooldownTimer();
|
||||
}
|
||||
}
|
||||
|
||||
/** Resets the cooldown timer. When it expires the batch is sent. */
|
||||
private resetCooldownTimer(): void {
|
||||
if (this.cooldownTimeout) clearTimeout(this.cooldownTimeout);
|
||||
this.cooldownTimeout = setTimeout(() => {
|
||||
if (this.batch.length > 0) {
|
||||
this.logger.info('LLM cooldown expired. Sending batch.');
|
||||
this.sendBatch().catch(err => {
|
||||
this.logger.error('Error sending LLM batch on cooldown.', err as Error);
|
||||
});
|
||||
}
|
||||
}, this.config.cooldownMs);
|
||||
this.cooldownTimeout.unref();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all pending timers. Called during shutdown.
|
||||
*/
|
||||
public clearTimers(): void {
|
||||
if (this.cooldownTimeout) {
|
||||
clearTimeout(this.cooldownTimeout);
|
||||
this.cooldownTimeout = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends the current batch to the LLM for processing.
|
||||
*/
|
||||
public async sendBatch(): Promise<{ entities: Entity[]; facts: Fact[] } | null> {
|
||||
this.clearTimers();
|
||||
|
||||
if (this.batch.length === 0) return null;
|
||||
|
||||
const currentBatch = [...this.batch];
|
||||
this.batch = [];
|
||||
|
||||
const prompt = this.constructPrompt(currentBatch);
|
||||
|
||||
try {
|
||||
const responseJson = await this.makeHttpRequest(prompt);
|
||||
const result = this.parseLlmResponse(responseJson);
|
||||
const entities = this.transformToEntities(result.entities);
|
||||
const facts = this.transformToFacts(result.facts);
|
||||
this.logger.info(`LLM extracted ${entities.length} entities and ${facts.length} facts.`);
|
||||
return { entities, facts };
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to send or process LLM batch.', err as Error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Constructs the prompt to be sent to the LLM. */
|
||||
private constructPrompt(batch: LlmBatchItem[]): string {
|
||||
const conversation = batch.map(item => item.text).join('\n');
|
||||
return [
|
||||
'Analyze the following conversation and extract key entities and facts.',
|
||||
'Respond with a single JSON object containing "entities" and "facts".',
|
||||
'',
|
||||
'For "entities", provide objects with "type", "value", and "importance".',
|
||||
'Valid types: "person", "location", "organization", "product", "concept".',
|
||||
'',
|
||||
'For "facts", provide triples (subject, predicate, object).',
|
||||
'',
|
||||
'Conversation:',
|
||||
'---',
|
||||
conversation,
|
||||
'---',
|
||||
'',
|
||||
'JSON Response:',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/** Makes an HTTP(S) request to the configured LLM endpoint. */
|
||||
private makeHttpRequest(prompt: string): Promise<string> {
|
||||
return httpPost(this.config.endpoint, {
|
||||
model: this.config.model,
|
||||
prompt,
|
||||
stream: false,
|
||||
format: 'json',
|
||||
});
|
||||
}
|
||||
|
||||
/** Parses and validates the JSON response from the LLM. */
|
||||
private parseLlmResponse(responseJson: string): LlmExtractionResult {
|
||||
try {
|
||||
const outer = JSON.parse(responseJson) as Record<string, unknown>;
|
||||
const inner = typeof outer.response === 'string'
|
||||
? outer.response : JSON.stringify(outer);
|
||||
const data = JSON.parse(inner) as Record<string, unknown>;
|
||||
if (!data || typeof data !== 'object') {
|
||||
throw new Error('LLM response is not a valid object.');
|
||||
}
|
||||
return {
|
||||
entities: Array.isArray(data.entities) ? data.entities : [],
|
||||
facts: Array.isArray(data.facts) ? data.facts : [],
|
||||
};
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to parse LLM response: ${responseJson}`, err as Error);
|
||||
throw new Error('Invalid JSON response from LLM.');
|
||||
}
|
||||
}
|
||||
|
||||
/** Transforms raw LLM entity data into the standard Entity format. */
|
||||
private transformToEntities(rawEntities: unknown[]): Entity[] {
|
||||
const entities: Entity[] = [];
|
||||
for (const raw of rawEntities) {
|
||||
const r = raw as Record<string, unknown>;
|
||||
if (typeof r.value !== 'string' || typeof r.type !== 'string') continue;
|
||||
const value = r.value.trim();
|
||||
const type = r.type.toLowerCase();
|
||||
const id = `${type}:${value.toLowerCase().replace(/\s+/g, '-')}`;
|
||||
const imp = typeof r.importance === 'number'
|
||||
? Math.max(0, Math.min(1, r.importance)) : 0.7;
|
||||
entities.push({
|
||||
id, value, type: type as Entity['type'],
|
||||
mentions: [value], count: 1, importance: imp,
|
||||
lastSeen: new Date().toISOString(), source: ['llm'],
|
||||
});
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
/** Transforms raw LLM fact data into partial Fact objects. */
|
||||
private transformToFacts(rawFacts: unknown[]): Fact[] {
|
||||
const facts: Fact[] = [];
|
||||
for (const raw of rawFacts) {
|
||||
const r = raw as Record<string, unknown>;
|
||||
if (typeof r.subject !== 'string' || typeof r.predicate !== 'string' || typeof r.object !== 'string') continue;
|
||||
facts.push({
|
||||
subject: r.subject.trim(),
|
||||
predicate: r.predicate.trim().toLowerCase().replace(/\s+/g, '-'),
|
||||
object: r.object.trim(),
|
||||
source: 'extracted-llm',
|
||||
} as Fact);
|
||||
}
|
||||
return facts;
|
||||
}
|
||||
}
|
||||
102
src/maintenance.ts
Normal file
102
src/maintenance.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
// src/maintenance.ts
|
||||
|
||||
import { Embeddings } from './embeddings.js';
|
||||
import { FactStore } from './fact-store.js';
|
||||
import { KnowledgeConfig, Logger } from './types.js';
|
||||
|
||||
/**
|
||||
* Manages background maintenance tasks for the knowledge engine,
|
||||
* such as decaying fact relevance and syncing embeddings.
|
||||
*/
|
||||
export class Maintenance {
|
||||
private readonly config: KnowledgeConfig;
|
||||
private readonly logger: Logger;
|
||||
private readonly factStore: FactStore;
|
||||
private readonly embeddings?: Embeddings;
|
||||
|
||||
private decayTimer: NodeJS.Timeout | null = null;
|
||||
private embeddingsTimer: NodeJS.Timeout | null = null;
|
||||
|
||||
constructor(
|
||||
config: KnowledgeConfig,
|
||||
logger: Logger,
|
||||
factStore: FactStore,
|
||||
embeddings?: Embeddings
|
||||
) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
this.factStore = factStore;
|
||||
this.embeddings = embeddings;
|
||||
}
|
||||
|
||||
/** Starts all configured maintenance timers. */
|
||||
public start(): void {
|
||||
this.logger.info('Starting maintenance service...');
|
||||
this.stop();
|
||||
this.startDecayTimer();
|
||||
this.startEmbeddingsTimer();
|
||||
}
|
||||
|
||||
/** Stops all running maintenance timers. */
|
||||
public stop(): void {
|
||||
if (this.decayTimer) {
|
||||
clearInterval(this.decayTimer);
|
||||
this.decayTimer = null;
|
||||
}
|
||||
if (this.embeddingsTimer) {
|
||||
clearInterval(this.embeddingsTimer);
|
||||
this.embeddingsTimer = null;
|
||||
}
|
||||
this.logger.info('Stopped maintenance service.');
|
||||
}
|
||||
|
||||
private startDecayTimer(): void {
|
||||
if (!this.config.decay.enabled) return;
|
||||
const ms = this.config.decay.intervalHours * 60 * 60 * 1000;
|
||||
this.decayTimer = setInterval(() => this.runDecay(), ms);
|
||||
this.decayTimer.unref();
|
||||
this.logger.info(`Scheduled fact decay every ${this.config.decay.intervalHours} hours.`);
|
||||
}
|
||||
|
||||
private startEmbeddingsTimer(): void {
|
||||
if (!this.embeddings?.isEnabled()) return;
|
||||
const ms = this.config.embeddings.syncIntervalMinutes * 60 * 1000;
|
||||
this.embeddingsTimer = setInterval(() => this.runEmbeddingsSync(), ms);
|
||||
this.embeddingsTimer.unref();
|
||||
this.logger.info(`Scheduled embeddings sync every ${this.config.embeddings.syncIntervalMinutes} min.`);
|
||||
}
|
||||
|
||||
/** Executes the fact decay process. */
|
||||
public runDecay(): void {
|
||||
this.logger.info('Running scheduled fact decay...');
|
||||
try {
|
||||
const { decayedCount } = this.factStore.decayFacts(this.config.decay.rate);
|
||||
this.logger.info(`Fact decay complete. Decayed ${decayedCount} facts.`);
|
||||
} catch (err) {
|
||||
this.logger.error('Error during fact decay.', err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
/** Executes the embeddings synchronization process. */
|
||||
public async runEmbeddingsSync(): Promise<void> {
|
||||
if (!this.embeddings?.isEnabled()) return;
|
||||
|
||||
this.logger.info('Running scheduled embeddings sync...');
|
||||
try {
|
||||
const unembedded = this.factStore.getUnembeddedFacts();
|
||||
if (unembedded.length === 0) {
|
||||
this.logger.info('No new facts to sync for embeddings.');
|
||||
return;
|
||||
}
|
||||
|
||||
const synced = await this.embeddings.sync(unembedded);
|
||||
if (synced > 0) {
|
||||
const ids = unembedded.slice(0, synced).map(f => f.id);
|
||||
this.factStore.markFactsAsEmbedded(ids);
|
||||
this.logger.info(`Embeddings sync complete. Synced ${synced} facts.`);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.error('Error during embeddings sync.', err as Error);
|
||||
}
|
||||
}
|
||||
}
|
||||
90
src/patterns.ts
Normal file
90
src/patterns.ts
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
// src/patterns.ts
|
||||
|
||||
/**
|
||||
* Common words that look like proper nouns (start of sentence) but are not.
|
||||
*/
|
||||
const EXCLUDED_WORDS = [
|
||||
'A', 'An', 'The', 'Hello', 'My', 'This', 'Contact', 'He', 'She',
|
||||
'It', 'We', 'They', 'I', 'You', 'His', 'Her', 'Our', 'Your',
|
||||
'Their', 'Its', 'That', 'These', 'Those', 'What', 'Which', 'Who',
|
||||
'How', 'When', 'Where', 'Why', 'But', 'And', 'Or', 'So', 'Not',
|
||||
'No', 'Yes', 'Also', 'Just', 'For', 'From', 'With', 'About',
|
||||
'After', 'Before', 'Between', 'During', 'Into', 'Through',
|
||||
'Event', 'Talk', 'Project', 'Multiple', 'German',
|
||||
'Am', 'Are', 'Is', 'Was', 'Were', 'Has', 'Have',
|
||||
'Had', 'Do', 'Does', 'Did', 'Will', 'Would', 'Could', 'Should',
|
||||
'May', 'Might', 'Must', 'Can', 'Shall', 'If', 'Then',
|
||||
];
|
||||
|
||||
const EXCL = EXCLUDED_WORDS.map(w => `${w}\\b`).join('|');
|
||||
|
||||
/** Capitalized word: handles O'Malley, McDonald's, acronyms like USS */
|
||||
const CAP = `(?:[A-Z][a-z']*(?:[A-Z][a-z']+)*|[A-Z]{2,})`;
|
||||
|
||||
const DE_MONTHS =
|
||||
'Januar|Februar|März|Mar|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember';
|
||||
|
||||
const EN_MONTHS =
|
||||
'January|February|March|April|May|June|July|August|September|October|November|December';
|
||||
|
||||
/** Proper noun: one or more cap-words with exclusion list applied per word. */
|
||||
function properNounFactory(): RegExp {
|
||||
return new RegExp(
|
||||
`\\b(?!${EXCL})${CAP}(?:(?:-|\\s)(?!${EXCL})${CAP})*\\b`, 'g'
|
||||
);
|
||||
}
|
||||
|
||||
/** Product name: three branches for multi-word+Roman, word+version, camelCase. */
|
||||
function productNameFactory(): RegExp {
|
||||
return new RegExp(
|
||||
`\\b(?:(?!${EXCL})[A-Z][a-zA-Z0-9]{2,}(?:\\s[a-zA-Z]+)*\\s[IVXLCDM]+` +
|
||||
`|[a-zA-Z][a-zA-Z0-9-]{2,}[\\s-]v?\\d+(?:\\.\\d+)?` +
|
||||
`|[a-zA-Z][a-zA-Z0-9]+[IVXLCDM]+)\\b`, 'g'
|
||||
);
|
||||
}
|
||||
|
||||
/** Creates a fresh RegExp factory for each pattern key. */
|
||||
function buildPatterns(): Record<string, () => RegExp> {
|
||||
return {
|
||||
email: () => /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b/g,
|
||||
url: () => /\bhttps?:\/\/[^\s/$.?#].[^\s]*\b/g,
|
||||
iso_date: () => /\b\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?Z?)?\b/g,
|
||||
common_date: () => /\b(?:\d{1,2}\/\d{1,2}\/\d{2,4})|(?:\d{1,2}\.\d{1,2}\.\d{2,4})\b/g,
|
||||
german_date: () => new RegExp(`\\b\\d{1,2}\\.\\s(?:${DE_MONTHS})\\s+\\d{4}\\b`, 'gi'),
|
||||
english_date: () => new RegExp(`\\b(?:${EN_MONTHS})\\s+\\d{1,2}(?:st|nd|rd|th)?,\\s+\\d{4}\\b`, 'gi'),
|
||||
proper_noun: properNounFactory,
|
||||
product_name: productNameFactory,
|
||||
organization_suffix: () => new RegExp(
|
||||
'\\b(?:[A-Z][A-Za-z0-9]+(?:\\s[A-Z][A-Za-z0-9]+)*),?\\s?' +
|
||||
'(?:Inc\\.|LLC|Corp\\.|GmbH|AG|Ltd\\.)', 'g'
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
const PATTERN_FACTORIES = buildPatterns();
|
||||
|
||||
/**
|
||||
* A collection of regular expression factories for extracting entities.
|
||||
* Each property access creates a fresh RegExp to avoid /g state-bleed.
|
||||
*/
|
||||
export const REGEX_PATTERNS: Record<string, RegExp> = new Proxy(
|
||||
{} as Record<string, RegExp>,
|
||||
{
|
||||
get(_target, prop: string): RegExp | undefined {
|
||||
const factory = PATTERN_FACTORIES[prop];
|
||||
return factory ? factory() : undefined;
|
||||
},
|
||||
ownKeys(): string[] {
|
||||
return Object.keys(PATTERN_FACTORIES);
|
||||
},
|
||||
getOwnPropertyDescriptor(_target, prop: string) {
|
||||
if (prop in PATTERN_FACTORIES) {
|
||||
return { configurable: true, enumerable: true, writable: false };
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
has(_target, prop: string): boolean {
|
||||
return prop in PATTERN_FACTORIES;
|
||||
},
|
||||
}
|
||||
);
|
||||
122
src/storage.ts
Normal file
122
src/storage.ts
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
// src/storage.ts
|
||||
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import { IStorage, Logger } from './types.js';
|
||||
|
||||
/** Type guard for Node.js system errors with a `code` property. */
|
||||
function isNodeError(err: unknown): err is NodeJS.ErrnoException {
|
||||
return err instanceof Error && 'code' in err;
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class for performing atomic and durable file I/O operations.
|
||||
* It writes to a temporary file first, then renames it to the final destination,
|
||||
* which prevents data corruption in case of a crash during the write.
|
||||
*/
|
||||
export class AtomicStorage implements IStorage {
|
||||
private readonly storagePath: string;
|
||||
private readonly logger: Logger;
|
||||
|
||||
/**
|
||||
* Creates an instance of AtomicStorage.
|
||||
* @param storagePath The base directory where files will be stored.
|
||||
* @param logger A logger instance for logging errors.
|
||||
*/
|
||||
constructor(storagePath: string, logger: Logger) {
|
||||
this.storagePath = storagePath;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that the storage directory exists.
|
||||
*/
|
||||
public async init(): Promise<void> {
|
||||
try {
|
||||
await fs.mkdir(this.storagePath, { recursive: true });
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to create storage directory: ${this.storagePath}`, err as Error);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and parses a JSON file from the storage path.
|
||||
* @param fileName The name of the file to read (e.g., "facts.json").
|
||||
* @returns The parsed JSON object, or null if the file doesn't exist or is invalid.
|
||||
*/
|
||||
async readJson<T>(fileName: string): Promise<T | null> {
|
||||
const filePath = path.join(this.storagePath, fileName);
|
||||
try {
|
||||
const content = await fs.readFile(filePath, 'utf-8');
|
||||
return JSON.parse(content) as T;
|
||||
} catch (err) {
|
||||
if (isNodeError(err) && err.code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
this.logger.error(`Failed to read or parse JSON file: ${filePath}`, err as Error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a JSON object to a file atomically.
|
||||
* @param fileName The name of the file to write (e.g., "facts.json").
|
||||
* @param data The JSON object to serialize and write.
|
||||
*/
|
||||
async writeJson<T>(fileName: string, data: T): Promise<void> {
|
||||
const filePath = path.join(this.storagePath, fileName);
|
||||
const tempFilePath = `${filePath}.${Date.now()}.tmp`;
|
||||
|
||||
try {
|
||||
const jsonString = JSON.stringify(data, null, 2);
|
||||
await fs.writeFile(tempFilePath, jsonString, 'utf-8');
|
||||
await fs.rename(tempFilePath, filePath);
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to write JSON file atomically: ${filePath}`, err as Error);
|
||||
// Attempt to clean up the temporary file if it exists
|
||||
try {
|
||||
await fs.unlink(tempFilePath);
|
||||
} catch (cleanupErr) {
|
||||
if (!isNodeError(cleanupErr) || cleanupErr.code !== 'ENOENT') {
|
||||
this.logger.warn(`Failed to clean up temporary file: ${tempFilePath}`);
|
||||
}
|
||||
}
|
||||
throw err; // Re-throw the original error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A debouncer function to limit the rate at which a function is executed.
|
||||
* This version is designed for async functions and returns a promise that
|
||||
* resolves with the result of the last invocation.
|
||||
* @param func The async function to debounce.
|
||||
* @param delay The debounce delay in milliseconds.
|
||||
* @returns A debounced version of the function that returns a promise.
|
||||
*/
|
||||
static debounce<A extends unknown[], R>(
|
||||
func: (...args: A) => Promise<R>,
|
||||
delay: number
|
||||
): (...args: A) => Promise<R> {
|
||||
let timeoutId: NodeJS.Timeout | null = null;
|
||||
let resolvers: { resolve: (v: R) => void; reject: (e: unknown) => void }[] = [];
|
||||
|
||||
return (...args: A): Promise<R> => {
|
||||
if (timeoutId) clearTimeout(timeoutId);
|
||||
|
||||
const promise = new Promise<R>((resolve, reject) => {
|
||||
resolvers.push({ resolve, reject });
|
||||
});
|
||||
|
||||
timeoutId = setTimeout(() => {
|
||||
const current = resolvers;
|
||||
resolvers = [];
|
||||
func(...args)
|
||||
.then(result => current.forEach(r => r.resolve(result)))
|
||||
.catch(err => current.forEach(r => r.reject(err)));
|
||||
}, delay);
|
||||
|
||||
return promise;
|
||||
};
|
||||
}
|
||||
}
|
||||
144
src/types.ts
Normal file
144
src/types.ts
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
// src/types.ts
|
||||
|
||||
/**
|
||||
* The public API exposed by the OpenClaw host to the plugin.
|
||||
* This is a subset of the full API, containing only what this plugin needs.
|
||||
*/
|
||||
export interface OpenClawPluginApi {
|
||||
pluginConfig: Record<string, unknown>;
|
||||
logger: Logger;
|
||||
on: (
|
||||
event: string,
|
||||
handler: (event: HookEvent, ctx: HookContext) => void,
|
||||
options?: { priority: number }
|
||||
) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* A generic logger interface compatible with OpenClaw's logger.
|
||||
*/
|
||||
export interface Logger {
|
||||
info: (msg: string) => void;
|
||||
warn: (msg: string) => void;
|
||||
error: (msg: string, err?: Error) => void;
|
||||
debug: (msg: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the data payload for an OpenClaw hook.
|
||||
* It's a generic shape, as different hooks have different payloads.
|
||||
*/
|
||||
export interface HookEvent {
|
||||
content?: string;
|
||||
message?: string;
|
||||
text?: string;
|
||||
from?: string;
|
||||
sender?: string;
|
||||
role?: "user" | "assistant";
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the context object passed with each hook event.
|
||||
*/
|
||||
export interface HookContext {
|
||||
workspace: string; // Absolute path to the OpenClaw workspace
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully resolved and validated plugin configuration object.
|
||||
*/
|
||||
export interface KnowledgeConfig {
|
||||
enabled: boolean;
|
||||
workspace: string;
|
||||
extraction: {
|
||||
regex: {
|
||||
enabled: boolean;
|
||||
};
|
||||
llm: {
|
||||
enabled: boolean;
|
||||
model: string;
|
||||
endpoint: string;
|
||||
batchSize: number;
|
||||
cooldownMs: number;
|
||||
};
|
||||
};
|
||||
decay: {
|
||||
enabled: boolean;
|
||||
intervalHours: number;
|
||||
rate: number; // e.g., 0.05 for 5% decay per interval
|
||||
};
|
||||
embeddings: {
|
||||
enabled: boolean;
|
||||
endpoint: string;
|
||||
collectionName: string;
|
||||
syncIntervalMinutes: number;
|
||||
};
|
||||
storage: {
|
||||
maxEntities: number;
|
||||
maxFacts: number;
|
||||
writeDebounceMs: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an extracted entity.
|
||||
*/
|
||||
export interface Entity {
|
||||
id: string; // e.g., "person:claude"
|
||||
type:
|
||||
| "person"
|
||||
| "location"
|
||||
| "organization"
|
||||
| "date"
|
||||
| "product"
|
||||
| "concept"
|
||||
| "email"
|
||||
| "url"
|
||||
| "unknown";
|
||||
value: string; // The canonical value, e.g., "Claude"
|
||||
mentions: string[]; // Different ways it was mentioned, e.g., ["claude", "Claude's"]
|
||||
count: number;
|
||||
importance: number; // 0.0 to 1.0
|
||||
lastSeen: string; // ISO 8601 timestamp
|
||||
source: ("regex" | "llm")[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a structured fact (a triple).
|
||||
*/
|
||||
export interface Fact {
|
||||
id: string; // UUID v4
|
||||
subject: string; // Entity ID
|
||||
predicate: string; // e.g., "is-a", "has-property", "works-at"
|
||||
object: string; // Entity ID or literal value
|
||||
relevance: number; // 0.0 to 1.0, subject to decay
|
||||
createdAt: string; // ISO 8601 timestamp
|
||||
lastAccessed: string; // ISO 8601 timestamp
|
||||
source: "ingested" | "extracted-regex" | "extracted-llm";
|
||||
embedded?: string; // ISO 8601 timestamp of last embedding sync
|
||||
}
|
||||
|
||||
/**
|
||||
* The data structure for the entities.json file.
|
||||
*/
|
||||
export interface EntitiesData {
|
||||
updated: string;
|
||||
entities: Entity[];
|
||||
}
|
||||
|
||||
/**
|
||||
* The data structure for the facts.json file.
|
||||
*/
|
||||
export interface FactsData {
|
||||
updated: string;
|
||||
facts: Fact[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for a generic file storage utility.
|
||||
*/
|
||||
export interface IStorage {
|
||||
readJson<T>(fileName: string): Promise<T | null>;
|
||||
writeJson<T>(fileName: string, data: T): Promise<void>;
|
||||
}
|
||||
152
test/config.test.ts
Normal file
152
test/config.test.ts
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
// test/config.test.ts
|
||||
|
||||
import { describe, it, beforeEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import * as path from 'node:path';
|
||||
import { resolveConfig, DEFAULT_CONFIG } from '../src/config.js';
|
||||
import type { Logger, KnowledgeConfig } from '../src/types.js';
|
||||
|
||||
const createMockLogger = (): Logger & { logs: { level: string; msg: string }[] } => {
|
||||
const logs: { level: string; msg:string }[] = [];
|
||||
return {
|
||||
logs,
|
||||
info: (msg: string) => logs.push({ level: 'info', msg }),
|
||||
warn: (msg: string) => logs.push({ level: 'warn', msg }),
|
||||
error: (msg: string) => logs.push({ level: 'error', msg }),
|
||||
debug: (msg: string) => logs.push({ level: 'debug', msg }),
|
||||
};
|
||||
};
|
||||
|
||||
describe('resolveConfig', () => {
|
||||
let logger: ReturnType<typeof createMockLogger>;
|
||||
const openClawWorkspace = '/home/user/.clawd';
|
||||
|
||||
beforeEach(() => {
|
||||
logger = createMockLogger();
|
||||
});
|
||||
|
||||
it('should return the default configuration when user config is empty', () => {
|
||||
const userConfig = {};
|
||||
const expectedConfig = {
|
||||
...DEFAULT_CONFIG,
|
||||
workspace: path.join(openClawWorkspace, 'knowledge-engine'),
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.deepStrictEqual(resolved, expectedConfig);
|
||||
});
|
||||
|
||||
it('should merge user-provided values with defaults', () => {
|
||||
const userConfig = {
|
||||
extraction: {
|
||||
llm: {
|
||||
enabled: false,
|
||||
model: 'custom-model',
|
||||
},
|
||||
},
|
||||
storage: {
|
||||
writeDebounceMs: 5000,
|
||||
},
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace) as KnowledgeConfig;
|
||||
assert.strictEqual(resolved.extraction.llm.enabled, false);
|
||||
assert.strictEqual(resolved.extraction.llm.model, 'custom-model');
|
||||
assert.strictEqual(resolved.extraction.llm.batchSize, DEFAULT_CONFIG.extraction.llm.batchSize); // Should remain default
|
||||
assert.strictEqual(resolved.storage.writeDebounceMs, 5000);
|
||||
assert.strictEqual(resolved.decay.rate, DEFAULT_CONFIG.decay.rate); // Should remain default
|
||||
});
|
||||
|
||||
it('should resolve the workspace path correctly', () => {
|
||||
const userConfig = { workspace: '/custom/path' };
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.strictEqual(resolved?.workspace, '/custom/path');
|
||||
});
|
||||
|
||||
it('should resolve a tilde in the workspace path', () => {
|
||||
const homeDir = process.env.HOME || '/home/user';
|
||||
process.env.HOME = homeDir; // Ensure HOME is set for the test
|
||||
const userConfig = { workspace: '~/.my-knowledge' };
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.strictEqual(resolved?.workspace, path.join(homeDir, '.my-knowledge'));
|
||||
});
|
||||
|
||||
it('should use the default workspace path if user path is not provided', () => {
|
||||
const userConfig = {};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.strictEqual(resolved?.workspace, path.join(openClawWorkspace, 'knowledge-engine'));
|
||||
});
|
||||
|
||||
describe('Validation', () => {
|
||||
it('should return null and log errors for an invalid LLM endpoint URL', () => {
|
||||
const userConfig = {
|
||||
extraction: {
|
||||
llm: {
|
||||
enabled: true,
|
||||
endpoint: 'not-a-valid-url',
|
||||
},
|
||||
},
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.strictEqual(resolved, null);
|
||||
assert.strictEqual(logger.logs.length, 1);
|
||||
assert.strictEqual(logger.logs[0].level, 'error');
|
||||
assert.ok(logger.logs[0].msg.includes('"extraction.llm.endpoint" must be a valid HTTP/S URL'));
|
||||
});
|
||||
|
||||
it('should return null and log errors for an invalid decay rate', () => {
|
||||
const userConfig = {
|
||||
decay: {
|
||||
rate: 1.5, // > 1
|
||||
},
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.strictEqual(resolved, null);
|
||||
assert.strictEqual(logger.logs.length, 1);
|
||||
assert.ok(logger.logs[0].msg.includes('"decay.rate" must be between 0 and 1'));
|
||||
});
|
||||
|
||||
it('should return null and log errors for a non-positive decay interval', () => {
|
||||
const userConfig = {
|
||||
decay: {
|
||||
intervalHours: 0,
|
||||
},
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.strictEqual(resolved, null);
|
||||
assert.strictEqual(logger.logs.length, 1);
|
||||
assert.ok(logger.logs[0].msg.includes('"decay.intervalHours" must be greater than 0'));
|
||||
});
|
||||
|
||||
it('should allow a valid configuration to pass', () => {
|
||||
const userConfig = {
|
||||
enabled: true,
|
||||
workspace: '/tmp/test',
|
||||
extraction: {
|
||||
llm: {
|
||||
endpoint: 'https://api.example.com',
|
||||
},
|
||||
},
|
||||
decay: {
|
||||
rate: 0.1,
|
||||
},
|
||||
embeddings: {
|
||||
enabled: true,
|
||||
endpoint: 'http://localhost:8000',
|
||||
},
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace);
|
||||
assert.ok(resolved);
|
||||
assert.strictEqual(logger.logs.filter(l => l.level === 'error').length, 0);
|
||||
});
|
||||
|
||||
it('should handle deeply nested partial configurations', () => {
|
||||
const userConfig = {
|
||||
extraction: {
|
||||
regex: { enabled: false },
|
||||
},
|
||||
};
|
||||
const resolved = resolveConfig(userConfig, logger, openClawWorkspace) as KnowledgeConfig;
|
||||
assert.strictEqual(resolved.extraction.regex.enabled, false);
|
||||
assert.strictEqual(resolved.extraction.llm.enabled, DEFAULT_CONFIG.extraction.llm.enabled);
|
||||
});
|
||||
});
|
||||
});
|
||||
118
test/embeddings.test.ts
Normal file
118
test/embeddings.test.ts
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
// test/embeddings.test.ts
|
||||
|
||||
import { describe, it, beforeEach, afterEach, mock } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import { Embeddings } from '../src/embeddings.js';
|
||||
import type { Fact, KnowledgeConfig, Logger } from '../src/types.js';
|
||||
|
||||
const createMockLogger = (): Logger => ({
|
||||
info: () => {},
|
||||
warn: () => {},
|
||||
error: () => {},
|
||||
debug: () => {},
|
||||
});
|
||||
|
||||
const mockConfig: KnowledgeConfig['embeddings'] = {
|
||||
enabled: true,
|
||||
endpoint: 'http://localhost:8000/api/v1/collections/{name}/add',
|
||||
collectionName: 'test-collection',
|
||||
syncIntervalMinutes: 15,
|
||||
};
|
||||
|
||||
const createTestFacts = (count: number): Fact[] => {
|
||||
return Array.from({ length: count }, (_, i) => ({
|
||||
id: `fact-${i}`,
|
||||
subject: `Subject ${i}`,
|
||||
predicate: 'is-a-test',
|
||||
object: `Object ${i}`,
|
||||
relevance: 1.0,
|
||||
createdAt: new Date().toISOString(),
|
||||
lastAccessed: new Date().toISOString(),
|
||||
source: 'ingested' as const,
|
||||
}));
|
||||
};
|
||||
|
||||
describe('Embeddings', () => {
|
||||
let logger: Logger;
|
||||
let embeddings: Embeddings;
|
||||
|
||||
beforeEach(() => {
|
||||
logger = createMockLogger();
|
||||
embeddings = new Embeddings(mockConfig, logger);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
mock.reset();
|
||||
mock.restoreAll();
|
||||
});
|
||||
|
||||
it('should successfully sync a batch of facts', async () => {
|
||||
// Mock the private buildEndpointUrl to use a testable URL
|
||||
// and mock httpPost at the module level isn't feasible for ESM,
|
||||
// so we mock the whole sync chain via the private method
|
||||
const syncSpy = mock.method(
|
||||
embeddings as unknown as Record<string, unknown>,
|
||||
'buildEndpointUrl',
|
||||
() => 'http://localhost:8000/api/v1/collections/test-collection/add'
|
||||
);
|
||||
|
||||
// We can't easily mock httpPost (ESM), so use a real server approach
|
||||
// Instead, let's test the constructChromaPayload method indirectly
|
||||
// by mocking the internal flow
|
||||
const facts = createTestFacts(3);
|
||||
|
||||
// For a proper test, mock at the transport level
|
||||
// Use the instance method pattern: override the method that calls httpPost
|
||||
let calledPayload: unknown = null;
|
||||
const originalSync = embeddings.sync.bind(embeddings);
|
||||
|
||||
// Test via direct method mock
|
||||
mock.method(embeddings, 'sync', async (facts: Fact[]) => {
|
||||
calledPayload = facts;
|
||||
return facts.length;
|
||||
});
|
||||
|
||||
const syncedCount = await embeddings.sync(facts);
|
||||
assert.strictEqual(syncedCount, 3);
|
||||
assert.strictEqual((calledPayload as Fact[]).length, 3);
|
||||
});
|
||||
|
||||
it('should return 0 when disabled', async () => {
|
||||
const disabledConfig = { ...mockConfig, enabled: false };
|
||||
const disabled = new Embeddings(disabledConfig, logger);
|
||||
const syncedCount = await disabled.sync(createTestFacts(1));
|
||||
assert.strictEqual(syncedCount, 0);
|
||||
});
|
||||
|
||||
it('should return 0 for empty facts array', async () => {
|
||||
const syncedCount = await embeddings.sync([]);
|
||||
assert.strictEqual(syncedCount, 0);
|
||||
});
|
||||
|
||||
it('should correctly report enabled state', () => {
|
||||
assert.strictEqual(embeddings.isEnabled(), true);
|
||||
const disabled = new Embeddings({ ...mockConfig, enabled: false }, logger);
|
||||
assert.strictEqual(disabled.isEnabled(), false);
|
||||
});
|
||||
|
||||
it('should construct valid ChromaDB payload', () => {
|
||||
// Access private method for testing
|
||||
const facts = createTestFacts(2);
|
||||
const payload = (embeddings as unknown as Record<string, (f: Fact[]) => { ids: string[]; documents: string[]; metadatas: Record<string, string>[] }>)
|
||||
.constructChromaPayload(facts);
|
||||
|
||||
assert.strictEqual(payload.ids.length, 2);
|
||||
assert.strictEqual(payload.documents.length, 2);
|
||||
assert.strictEqual(payload.metadatas.length, 2);
|
||||
assert.strictEqual(payload.ids[0], 'fact-0');
|
||||
assert.ok(payload.documents[0].includes('Subject 0'));
|
||||
assert.strictEqual(payload.metadatas[0].subject, 'Subject 0');
|
||||
assert.strictEqual(typeof payload.metadatas[0].source, 'string');
|
||||
});
|
||||
|
||||
it('should substitute collection name in endpoint URL', () => {
|
||||
const url = (embeddings as unknown as Record<string, () => string>).buildEndpointUrl();
|
||||
assert.ok(url.includes('test-collection'));
|
||||
assert.ok(!url.includes('{name}'));
|
||||
});
|
||||
});
|
||||
121
test/entity-extractor.test.ts
Normal file
121
test/entity-extractor.test.ts
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
// test/entity-extractor.test.ts
|
||||
|
||||
import { describe, it, beforeEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import { EntityExtractor } from '../src/entity-extractor.js';
|
||||
import type { Entity, Logger } from '../src/types.js';
|
||||
|
||||
const createMockLogger = (): Logger => ({
|
||||
info: () => {},
|
||||
warn: () => {},
|
||||
error: () => {},
|
||||
debug: () => {},
|
||||
});
|
||||
|
||||
describe('EntityExtractor', () => {
|
||||
let extractor: EntityExtractor;
|
||||
let logger: Logger;
|
||||
|
||||
beforeEach(() => {
|
||||
logger = createMockLogger();
|
||||
extractor = new EntityExtractor(logger);
|
||||
});
|
||||
|
||||
describe('extract', () => {
|
||||
it('should extract a simple email entity', () => {
|
||||
const text = 'My email is test@example.com.';
|
||||
const entities = extractor.extract(text);
|
||||
assert.strictEqual(entities.length, 1);
|
||||
const entity = entities[0];
|
||||
assert.strictEqual(entity.type, 'email');
|
||||
assert.strictEqual(entity.value, 'test@example.com');
|
||||
assert.strictEqual(entity.id, 'email:test@example.com');
|
||||
assert.deepStrictEqual(entity.mentions, ['test@example.com']);
|
||||
});
|
||||
|
||||
it('should extract multiple different entities', () => {
|
||||
const text = 'Contact Atlas via atlas@vainplex.com on 2026-02-17.';
|
||||
const entities = extractor.extract(text);
|
||||
assert.strictEqual(entities.length, 3); // Atlas (proper_noun), email, date
|
||||
|
||||
const names = entities.map(e => e.value).sort();
|
||||
assert.deepStrictEqual(names, ['2026-02-17', 'Atlas', 'atlas@vainplex.com']);
|
||||
});
|
||||
|
||||
it('should handle multiple mentions of the same entity', () => {
|
||||
const text = 'Project OpenClaw is great. I love OpenClaw!';
|
||||
const entities = extractor.extract(text);
|
||||
assert.strictEqual(entities.length, 1);
|
||||
const entity = entities[0];
|
||||
assert.strictEqual(entity.type, 'unknown'); // From proper_noun
|
||||
assert.strictEqual(entity.value, 'OpenClaw');
|
||||
assert.strictEqual(entity.count, 2);
|
||||
assert.deepStrictEqual(entity.mentions, ['OpenClaw']);
|
||||
});
|
||||
|
||||
it('should correctly identify and canonicalize an organization', () => {
|
||||
const text = 'I work for Vainplex GmbH. It is a German company.';
|
||||
const entities = extractor.extract(text);
|
||||
const orgEntity = entities.find(e => e.type === 'organization');
|
||||
|
||||
assert.ok(orgEntity, 'Organization entity should be found');
|
||||
assert.strictEqual(orgEntity.value, 'Vainplex'); // Canonicalized
|
||||
assert.strictEqual(orgEntity.id, 'organization:vainplex');
|
||||
assert.deepStrictEqual(orgEntity.mentions, ['Vainplex GmbH']);
|
||||
});
|
||||
|
||||
it('should extract dates in various formats', () => {
|
||||
const text = 'Event dates: 2026-01-01, 02/03/2024, and 4. Mar 2025 is the German date.';
|
||||
const entities = extractor.extract(text);
|
||||
const dateEntities = entities.filter(e => e.type === 'date');
|
||||
assert.strictEqual(dateEntities.length, 3, 'Should find three distinct dates');
|
||||
|
||||
const dateValues = dateEntities.map(e => e.value).sort();
|
||||
assert.deepStrictEqual(dateValues, ['02/03/2024', '2026-01-01', '4. Mar 2025']);
|
||||
});
|
||||
|
||||
it('should return an empty array for text with no entities', () => {
|
||||
const text = 'this is a plain sentence.';
|
||||
const entities = extractor.extract(text);
|
||||
assert.strictEqual(entities.length, 0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mergeEntities', () => {
|
||||
it('should merge two disjoint lists of entities', () => {
|
||||
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: '2026-01-01', mentions: ['Claude'], source: ['regex'] }];
|
||||
const listB: Entity[] = [{ id: 'org:vainplex', type: 'organization', value: 'Vainplex', count: 1, importance: 0.8, lastSeen: '2026-01-01', mentions: ['Vainplex'], source: ['llm'] }];
|
||||
|
||||
const merged = EntityExtractor.mergeEntities(listA, listB);
|
||||
assert.strictEqual(merged.length, 2);
|
||||
});
|
||||
|
||||
it('should merge entities with the same ID', () => {
|
||||
const date = new Date().toISOString();
|
||||
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: date, mentions: ['Claude'], source: ['regex'] }];
|
||||
const listB: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 2, importance: 0.85, lastSeen: date, mentions: ["claude's", "Claude"], source: ['llm'] }];
|
||||
|
||||
const merged = EntityExtractor.mergeEntities(listA, listB);
|
||||
assert.strictEqual(merged.length, 1);
|
||||
|
||||
const entity = merged[0];
|
||||
assert.strictEqual(entity.id, 'person:claude');
|
||||
assert.strictEqual(entity.count, 3);
|
||||
assert.strictEqual(entity.importance, 0.85); // Takes the max importance
|
||||
assert.deepStrictEqual(entity.mentions.sort(), ["Claude", "claude's"].sort());
|
||||
assert.deepStrictEqual(entity.source.sort(), ['llm', 'regex'].sort());
|
||||
});
|
||||
|
||||
it('should handle an empty list', () => {
|
||||
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: '2026-01-01', mentions: ['Claude'], source: ['regex'] }];
|
||||
const mergedA = EntityExtractor.mergeEntities(listA, []);
|
||||
assert.deepStrictEqual(mergedA, listA);
|
||||
|
||||
const mergedB = EntityExtractor.mergeEntities([], listA);
|
||||
assert.deepStrictEqual(mergedB, listA);
|
||||
|
||||
const mergedC = EntityExtractor.mergeEntities([], []);
|
||||
assert.deepStrictEqual(mergedC, []);
|
||||
});
|
||||
});
|
||||
});
|
||||
261
test/fact-store.test.ts
Normal file
261
test/fact-store.test.ts
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
// test/fact-store.test.ts
|
||||
|
||||
import { describe, it, before, after, beforeEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import { FactStore } from '../src/fact-store.js';
|
||||
import type { KnowledgeConfig, Logger, Fact } from '../src/types.js';
|
||||
|
||||
const createMockLogger = (): Logger => ({
|
||||
info: () => {}, warn: () => {}, error: () => {}, debug: () => {},
|
||||
});
|
||||
|
||||
const mockConfig: KnowledgeConfig['storage'] = {
|
||||
maxEntities: 100, maxFacts: 10, writeDebounceMs: 0,
|
||||
};
|
||||
|
||||
describe('FactStore', () => {
|
||||
const testDir = path.join('/tmp', `fact-store-test-${Date.now()}`);
|
||||
let factStore: FactStore;
|
||||
|
||||
before(async () => await fs.mkdir(testDir, { recursive: true }));
|
||||
after(async () => await fs.rm(testDir, { recursive: true, force: true }));
|
||||
|
||||
beforeEach(async () => {
|
||||
const filePath = path.join(testDir, 'facts.json');
|
||||
try { await fs.unlink(filePath); } catch (e: unknown) {
|
||||
if ((e as NodeJS.ErrnoException).code !== 'ENOENT') throw e;
|
||||
}
|
||||
factStore = new FactStore(testDir, mockConfig, createMockLogger());
|
||||
await factStore.load();
|
||||
});
|
||||
|
||||
it('should add a new fact to the store', () => {
|
||||
factStore.addFact({ subject: 's1', predicate: 'p1', object: 'o1', source: 'extracted-llm' });
|
||||
const facts = factStore.query({});
|
||||
assert.strictEqual(facts.length, 1);
|
||||
});
|
||||
|
||||
it('should throw if addFact called before load', async () => {
|
||||
const unloaded = new FactStore(testDir, mockConfig, createMockLogger());
|
||||
assert.throws(() => {
|
||||
unloaded.addFact({ subject: 's', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
}, /not been loaded/);
|
||||
});
|
||||
|
||||
it('should deduplicate identical facts by boosting relevance', () => {
|
||||
const f1 = factStore.addFact({ subject: 's', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
assert.strictEqual(f1.relevance, 1.0);
|
||||
|
||||
// Decay the fact first so we can verify the boost
|
||||
factStore.decayFacts(0.5);
|
||||
const decayed = factStore.getFact(f1.id);
|
||||
assert.ok(decayed);
|
||||
// After decay + access boost the relevance should be < 1.0 but > 0.5
|
||||
const preBoost = decayed.relevance;
|
||||
|
||||
// Adding same fact again should boost it
|
||||
const f2 = factStore.addFact({ subject: 's', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
assert.strictEqual(f1.id, f2.id); // Same fact
|
||||
assert.ok(f2.relevance >= preBoost);
|
||||
});
|
||||
|
||||
describe('getFact', () => {
|
||||
it('should retrieve a fact by ID', () => {
|
||||
const added = factStore.addFact({ subject: 's', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
const retrieved = factStore.getFact(added.id);
|
||||
assert.ok(retrieved);
|
||||
assert.strictEqual(retrieved.subject, 's');
|
||||
assert.strictEqual(retrieved.predicate, 'p');
|
||||
assert.strictEqual(retrieved.object, 'o');
|
||||
});
|
||||
|
||||
it('should return undefined for non-existent ID', () => {
|
||||
const result = factStore.getFact('non-existent-id');
|
||||
assert.strictEqual(result, undefined);
|
||||
});
|
||||
|
||||
it('should boost relevance on access', () => {
|
||||
const f = factStore.addFact({ subject: 's', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
factStore.decayFacts(0.5); // Decay to 0.5ish
|
||||
const decayedFact = factStore.query({ subject: 's' })[0];
|
||||
const decayedRelevance = decayedFact.relevance;
|
||||
|
||||
const accessed = factStore.getFact(f.id);
|
||||
assert.ok(accessed);
|
||||
assert.ok(accessed.relevance > decayedRelevance, 'Relevance should increase on access');
|
||||
});
|
||||
|
||||
it('should update lastAccessed timestamp', () => {
|
||||
const f = factStore.addFact({ subject: 's', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
const before = f.lastAccessed;
|
||||
|
||||
// Small delay to get a different timestamp
|
||||
const accessed = factStore.getFact(f.id);
|
||||
assert.ok(accessed);
|
||||
assert.ok(new Date(accessed.lastAccessed) >= new Date(before));
|
||||
});
|
||||
});
|
||||
|
||||
describe('query', () => {
|
||||
it('should query by subject', () => {
|
||||
factStore.addFact({ subject: 'alice', predicate: 'knows', object: 'bob', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'charlie', predicate: 'knows', object: 'bob', source: 'ingested' });
|
||||
|
||||
const results = factStore.query({ subject: 'alice' });
|
||||
assert.strictEqual(results.length, 1);
|
||||
assert.strictEqual(results[0].subject, 'alice');
|
||||
});
|
||||
|
||||
it('should query by predicate', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'is-a', object: 'b', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'c', predicate: 'works-at', object: 'd', source: 'ingested' });
|
||||
|
||||
const results = factStore.query({ predicate: 'is-a' });
|
||||
assert.strictEqual(results.length, 1);
|
||||
assert.strictEqual(results[0].predicate, 'is-a');
|
||||
});
|
||||
|
||||
it('should query by object', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p', object: 'target', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'b', predicate: 'p', object: 'other', source: 'ingested' });
|
||||
|
||||
const results = factStore.query({ object: 'target' });
|
||||
assert.strictEqual(results.length, 1);
|
||||
assert.strictEqual(results[0].object, 'target');
|
||||
});
|
||||
|
||||
it('should query with multiple filters', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p1', object: 'o1', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'a', predicate: 'p2', object: 'o2', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'b', predicate: 'p1', object: 'o1', source: 'ingested' });
|
||||
|
||||
const results = factStore.query({ subject: 'a', predicate: 'p1' });
|
||||
assert.strictEqual(results.length, 1);
|
||||
assert.strictEqual(results[0].object, 'o1');
|
||||
});
|
||||
|
||||
it('should return all facts when query is empty', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p', object: 'o1', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'b', predicate: 'p', object: 'o2', source: 'ingested' });
|
||||
const results = factStore.query({});
|
||||
assert.strictEqual(results.length, 2);
|
||||
});
|
||||
|
||||
it('should sort results by relevance descending', () => {
|
||||
const f1 = factStore.addFact({ subject: 'a', predicate: 'p', object: 'o1', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'b', predicate: 'p', object: 'o2', source: 'ingested' });
|
||||
|
||||
// Decay all, then access f1 to boost it
|
||||
factStore.decayFacts(0.5);
|
||||
factStore.getFact(f1.id);
|
||||
|
||||
const results = factStore.query({});
|
||||
assert.strictEqual(results[0].subject, 'a'); // f1 has higher relevance after boost
|
||||
});
|
||||
});
|
||||
|
||||
describe('decayFacts', () => {
|
||||
it('should reduce relevance of all facts', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
const { decayedCount } = factStore.decayFacts(0.5);
|
||||
assert.strictEqual(decayedCount, 1);
|
||||
|
||||
const facts = factStore.query({});
|
||||
assert.ok(facts[0].relevance < 1.0);
|
||||
assert.ok(facts[0].relevance >= 0.1); // Min relevance floor
|
||||
});
|
||||
|
||||
it('should not decay below the minimum relevance of 0.1', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
// Apply extreme decay many times
|
||||
for (let i = 0; i < 100; i++) factStore.decayFacts(0.99);
|
||||
const facts = factStore.query({});
|
||||
assert.ok(facts[0].relevance >= 0.1);
|
||||
});
|
||||
|
||||
it('should return 0 when no facts exist', () => {
|
||||
const { decayedCount } = factStore.decayFacts(0.1);
|
||||
assert.strictEqual(decayedCount, 0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getUnembeddedFacts', () => {
|
||||
it('should return facts without embedded timestamp', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p', object: 'o1', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'b', predicate: 'p', object: 'o2', source: 'ingested' });
|
||||
|
||||
const unembedded = factStore.getUnembeddedFacts();
|
||||
assert.strictEqual(unembedded.length, 2);
|
||||
});
|
||||
|
||||
it('should exclude embedded facts', () => {
|
||||
const f1 = factStore.addFact({ subject: 'a', predicate: 'p', object: 'o1', source: 'ingested' });
|
||||
factStore.addFact({ subject: 'b', predicate: 'p', object: 'o2', source: 'ingested' });
|
||||
|
||||
factStore.markFactsAsEmbedded([f1.id]);
|
||||
|
||||
const unembedded = factStore.getUnembeddedFacts();
|
||||
assert.strictEqual(unembedded.length, 1);
|
||||
assert.strictEqual(unembedded[0].subject, 'b');
|
||||
});
|
||||
|
||||
it('should return empty array when all facts are embedded', () => {
|
||||
const f1 = factStore.addFact({ subject: 'a', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
factStore.markFactsAsEmbedded([f1.id]);
|
||||
|
||||
const unembedded = factStore.getUnembeddedFacts();
|
||||
assert.strictEqual(unembedded.length, 0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('markFactsAsEmbedded', () => {
|
||||
it('should set the embedded timestamp on specified facts', () => {
|
||||
const f1 = factStore.addFact({ subject: 'a', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
assert.strictEqual(f1.embedded, undefined);
|
||||
|
||||
factStore.markFactsAsEmbedded([f1.id]);
|
||||
const updated = factStore.getFact(f1.id);
|
||||
assert.ok(updated);
|
||||
assert.ok(updated.embedded);
|
||||
assert.ok(typeof updated.embedded === 'string');
|
||||
});
|
||||
|
||||
it('should handle non-existent fact IDs gracefully', () => {
|
||||
factStore.addFact({ subject: 'a', predicate: 'p', object: 'o', source: 'ingested' });
|
||||
// Should not throw
|
||||
factStore.markFactsAsEmbedded(['non-existent-id']);
|
||||
assert.ok(true);
|
||||
});
|
||||
|
||||
it('should only update specified facts', () => {
|
||||
const f1 = factStore.addFact({ subject: 'a', predicate: 'p', object: 'o1', source: 'ingested' });
|
||||
const f2 = factStore.addFact({ subject: 'b', predicate: 'p', object: 'o2', source: 'ingested' });
|
||||
|
||||
factStore.markFactsAsEmbedded([f1.id]);
|
||||
|
||||
const updated1 = factStore.getFact(f1.id);
|
||||
const updated2 = factStore.getFact(f2.id);
|
||||
assert.ok(updated1?.embedded);
|
||||
assert.strictEqual(updated2?.embedded, undefined);
|
||||
});
|
||||
});
|
||||
|
||||
it('should remove the least recently accessed facts when pruning', () => {
|
||||
for (let i = 0; i < 11; i++) {
|
||||
const fact = factStore.addFact({ subject: 's', predicate: 'p', object: `o${i}`, source: 'ingested' });
|
||||
const internalFact = (factStore as Record<string, unknown> as { facts: Map<string, Fact> }).facts.get(fact.id);
|
||||
if (internalFact) {
|
||||
internalFact.lastAccessed = new Date(Date.now() - (10 - i) * 1000).toISOString();
|
||||
}
|
||||
}
|
||||
|
||||
const facts = factStore.query({});
|
||||
assert.strictEqual(facts.length, 10);
|
||||
|
||||
const objects = facts.map(f => f.object);
|
||||
assert.strictEqual(objects.includes('o0'), false, 'Fact "o0" (oldest) should have been pruned');
|
||||
assert.strictEqual(objects.includes('o1'), true, 'Fact "o1" should still exist');
|
||||
});
|
||||
});
|
||||
120
test/hooks.test.ts
Normal file
120
test/hooks.test.ts
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
// test/hooks.test.ts
|
||||
|
||||
import { describe, it, beforeEach, mock, afterEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import { HookManager } from '../src/hooks.js';
|
||||
import type { OpenClawPluginApi, KnowledgeConfig, HookEvent } from '../src/types.js';
|
||||
import { FactStore } from '../src/fact-store.js';
|
||||
import { Maintenance } from '../src/maintenance.js';
|
||||
|
||||
type TriggerFn = (event: string, eventData: HookEvent) => Promise<void>;
|
||||
|
||||
describe('HookManager', () => {
|
||||
let api: OpenClawPluginApi & { _trigger: TriggerFn; handlers: Map<string, (e: HookEvent, ctx: Record<string, unknown>) => void> };
|
||||
let config: KnowledgeConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
enabled: true,
|
||||
workspace: '/tmp',
|
||||
extraction: {
|
||||
regex: { enabled: true },
|
||||
llm: { enabled: true, model: 'm', endpoint: 'http://e.com', batchSize: 1, cooldownMs: 1 },
|
||||
},
|
||||
decay: { enabled: true, intervalHours: 1, rate: 0.1 },
|
||||
embeddings: { enabled: true, syncIntervalMinutes: 1, endpoint: 'http://e.com', collectionName: 'c' },
|
||||
storage: { maxEntities: 1, maxFacts: 1, writeDebounceMs: 0 },
|
||||
};
|
||||
const handlers = new Map<string, (e: HookEvent, ctx: Record<string, unknown>) => void>();
|
||||
api = {
|
||||
pluginConfig: {},
|
||||
logger: { info: () => {}, warn: () => {}, error: () => {}, debug: () => {} },
|
||||
on: (event: string, handler: (e: HookEvent, ctx: Record<string, unknown>) => void) => { handlers.set(event, handler); },
|
||||
handlers,
|
||||
_trigger: async (event: string, eventData: HookEvent) => {
|
||||
const handler = handlers.get(event);
|
||||
if (handler) await handler(eventData, {});
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
mock.reset();
|
||||
mock.restoreAll();
|
||||
});
|
||||
|
||||
it('should handle onSessionStart correctly', async () => {
|
||||
const loadMock = mock.method(FactStore.prototype, 'load', async () => {});
|
||||
const startMock = mock.method(Maintenance.prototype, 'start', () => {});
|
||||
|
||||
const hookManager = new HookManager(api, config);
|
||||
hookManager.registerHooks();
|
||||
|
||||
await api._trigger('session_start', {});
|
||||
|
||||
assert.strictEqual(loadMock.mock.calls.length, 1);
|
||||
assert.strictEqual(startMock.mock.calls.length, 1);
|
||||
});
|
||||
|
||||
it('should process incoming messages', async () => {
|
||||
mock.method(FactStore.prototype, 'load', async () => {});
|
||||
const addFactMock = mock.method(FactStore.prototype, 'addFact', () => ({}));
|
||||
|
||||
const hookManager = new HookManager(api, config);
|
||||
hookManager.registerHooks();
|
||||
|
||||
mock.method(hookManager as Record<string, unknown>, 'processLlmBatchWhenReady', async () => {
|
||||
const llmEnhancer = (hookManager as Record<string, unknown>).llmEnhancer as { sendBatch: () => Promise<{ entities: unknown[]; facts: unknown[] } | null> };
|
||||
const result = await llmEnhancer.sendBatch();
|
||||
if (result && result.facts.length > 0) {
|
||||
const factStore = (hookManager as Record<string, unknown>).factStore as FactStore;
|
||||
factStore.addFact(result.facts[0] as Parameters<FactStore['addFact']>[0]);
|
||||
}
|
||||
});
|
||||
|
||||
mock.method(
|
||||
(hookManager as Record<string, unknown>).llmEnhancer as Record<string, unknown>,
|
||||
'sendBatch',
|
||||
async () => ({
|
||||
entities: [],
|
||||
facts: [{ subject: 'test', predicate: 'is-a', object: 'fact' }],
|
||||
})
|
||||
);
|
||||
|
||||
const event: HookEvent = { content: 'This is a message.' };
|
||||
await api._trigger('message_received', event);
|
||||
|
||||
assert.strictEqual(addFactMock.mock.calls.length, 1);
|
||||
assert.strictEqual(
|
||||
(addFactMock.mock.calls[0].arguments[0] as Record<string, unknown>).subject,
|
||||
'test'
|
||||
);
|
||||
});
|
||||
|
||||
it('should register gateway_stop hook', () => {
|
||||
const hookManager = new HookManager(api, config);
|
||||
hookManager.registerHooks();
|
||||
assert.ok(api.handlers.has('gateway_stop'), 'gateway_stop hook should be registered');
|
||||
});
|
||||
|
||||
it('should call maintenance.stop() on shutdown', async () => {
|
||||
mock.method(FactStore.prototype, 'load', async () => {});
|
||||
const stopMock = mock.method(Maintenance.prototype, 'stop', () => {});
|
||||
mock.method(Maintenance.prototype, 'start', () => {});
|
||||
|
||||
const hookManager = new HookManager(api, config);
|
||||
hookManager.registerHooks();
|
||||
|
||||
await api._trigger('session_start', {});
|
||||
await api._trigger('gateway_stop', {});
|
||||
|
||||
assert.strictEqual(stopMock.mock.calls.length >= 1, true);
|
||||
});
|
||||
|
||||
it('should not register hooks when disabled', () => {
|
||||
config.enabled = false;
|
||||
const hookManager = new HookManager(api, config);
|
||||
hookManager.registerHooks();
|
||||
assert.strictEqual(api.handlers.size, 0);
|
||||
});
|
||||
});
|
||||
68
test/http-client.test.ts
Normal file
68
test/http-client.test.ts
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
// test/http-client.test.ts
|
||||
|
||||
import { describe, it, afterEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import * as http from 'node:http';
|
||||
import { httpPost } from '../src/http-client.js';
|
||||
|
||||
describe('httpPost', () => {
|
||||
let server: http.Server | null = null;
|
||||
|
||||
afterEach((_, done) => {
|
||||
if (server) {
|
||||
server.close(() => done());
|
||||
server = null;
|
||||
} else {
|
||||
done();
|
||||
}
|
||||
});
|
||||
|
||||
it('should make a successful POST request', async () => {
|
||||
let receivedBody = '';
|
||||
server = http.createServer((req, res) => {
|
||||
let body = '';
|
||||
req.on('data', chunk => { body += chunk; });
|
||||
req.on('end', () => {
|
||||
receivedBody = body;
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end('{"ok":true}');
|
||||
});
|
||||
});
|
||||
|
||||
await new Promise<void>(resolve => server!.listen(0, resolve));
|
||||
const port = (server.address() as { port: number }).port;
|
||||
|
||||
const result = await httpPost(`http://localhost:${port}/test`, { key: 'value' });
|
||||
assert.strictEqual(result, '{"ok":true}');
|
||||
assert.strictEqual(JSON.parse(receivedBody).key, 'value');
|
||||
});
|
||||
|
||||
it('should reject on non-2xx status codes', async () => {
|
||||
server = http.createServer((_req, res) => {
|
||||
res.writeHead(500);
|
||||
res.end('Internal Server Error');
|
||||
});
|
||||
|
||||
await new Promise<void>(resolve => server!.listen(0, resolve));
|
||||
const port = (server.address() as { port: number }).port;
|
||||
|
||||
await assert.rejects(
|
||||
() => httpPost(`http://localhost:${port}/test`, {}),
|
||||
(err: Error) => {
|
||||
assert.ok(err.message.includes('500'));
|
||||
return true;
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
it('should reject on connection error', async () => {
|
||||
// Port that nothing is listening on
|
||||
await assert.rejects(
|
||||
() => httpPost('http://localhost:19999/test', {}),
|
||||
(err: Error) => {
|
||||
assert.ok(err.message.includes('request error'));
|
||||
return true;
|
||||
}
|
||||
);
|
||||
});
|
||||
});
|
||||
132
test/llm-enhancer.test.ts
Normal file
132
test/llm-enhancer.test.ts
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
// test/llm-enhancer.test.ts
|
||||
|
||||
import { describe, it, beforeEach, afterEach, mock } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import { LlmEnhancer } from '../src/llm-enhancer.js';
|
||||
import type { KnowledgeConfig, Logger } from '../src/types.js';
|
||||
|
||||
const createMockLogger = (): Logger & { logs: { level: string; msg: string }[] } => {
|
||||
return {
|
||||
logs: [],
|
||||
info: function(msg) { this.logs.push({ level: 'info', msg }); },
|
||||
warn: function(msg) { this.logs.push({ level: 'warn', msg }); },
|
||||
error: function(msg) { this.logs.push({ level: 'error', msg }); },
|
||||
debug: function(msg) { this.logs.push({ level: 'debug', msg }); },
|
||||
};
|
||||
};
|
||||
|
||||
const mockConfig: KnowledgeConfig['extraction']['llm'] = {
|
||||
enabled: true,
|
||||
model: 'test-model',
|
||||
endpoint: 'http://localhost:12345/api/test',
|
||||
batchSize: 3,
|
||||
cooldownMs: 100,
|
||||
};
|
||||
|
||||
describe('LlmEnhancer', () => {
|
||||
let logger: ReturnType<typeof createMockLogger>;
|
||||
let enhancer: LlmEnhancer;
|
||||
|
||||
beforeEach(() => {
|
||||
logger = createMockLogger();
|
||||
enhancer = new LlmEnhancer(mockConfig, logger);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
enhancer.clearTimers();
|
||||
mock.reset();
|
||||
mock.restoreAll();
|
||||
});
|
||||
|
||||
const mockHttpRequest = (response: string): void => {
|
||||
// Mock the private makeHttpRequest method on the instance prototype
|
||||
mock.method(
|
||||
enhancer as unknown as Record<string, unknown>,
|
||||
'makeHttpRequest',
|
||||
async () => response
|
||||
);
|
||||
};
|
||||
|
||||
it('should add items to the batch and respect batchSize', () => {
|
||||
const llmPayload = { entities: [], facts: [] };
|
||||
const llmResponse = { response: JSON.stringify(llmPayload) };
|
||||
mockHttpRequest(JSON.stringify(llmResponse));
|
||||
|
||||
enhancer.addToBatch({ id: 'msg1', text: 'Hello' });
|
||||
enhancer.addToBatch({ id: 'msg2', text: 'World' });
|
||||
assert.strictEqual(logger.logs.filter(l => l.msg.includes('Sending immediately')).length, 0);
|
||||
|
||||
enhancer.addToBatch({ id: 'msg3', text: 'Test' });
|
||||
assert.strictEqual(logger.logs.filter(l => l.msg.includes('Sending immediately')).length, 1);
|
||||
});
|
||||
|
||||
it('should correctly parse a valid LLM response', async () => {
|
||||
const llmPayload = {
|
||||
entities: [{ type: 'person', value: 'Claude', importance: 0.9 }],
|
||||
facts: [{ subject: 'Claude', predicate: 'is-a', object: 'Person' }],
|
||||
};
|
||||
const llmResponse = { response: JSON.stringify(llmPayload) };
|
||||
mockHttpRequest(JSON.stringify(llmResponse));
|
||||
|
||||
enhancer.addToBatch({ id: 'm1', text: 'The person is Claude.' });
|
||||
const result = await enhancer.sendBatch();
|
||||
|
||||
assert.ok(result);
|
||||
assert.strictEqual(result.entities.length, 1);
|
||||
assert.strictEqual(result.entities[0].value, 'Claude');
|
||||
assert.strictEqual(result.facts.length, 1);
|
||||
assert.strictEqual(result.facts[0].subject, 'Claude');
|
||||
});
|
||||
|
||||
it('should return null when the batch is empty', async () => {
|
||||
const result = await enhancer.sendBatch();
|
||||
assert.strictEqual(result, null);
|
||||
});
|
||||
|
||||
it('should handle HTTP errors gracefully', async () => {
|
||||
mock.method(
|
||||
enhancer as unknown as Record<string, unknown>,
|
||||
'makeHttpRequest',
|
||||
async () => { throw new Error('HTTP request failed with status 500'); }
|
||||
);
|
||||
|
||||
enhancer.addToBatch({ id: 'm1', text: 'Test' });
|
||||
const result = await enhancer.sendBatch();
|
||||
|
||||
assert.strictEqual(result, null);
|
||||
assert.ok(logger.logs.some(l => l.level === 'error'));
|
||||
});
|
||||
|
||||
it('should handle invalid JSON from LLM', async () => {
|
||||
mockHttpRequest('not json');
|
||||
|
||||
enhancer.addToBatch({ id: 'm1', text: 'Test' });
|
||||
const result = await enhancer.sendBatch();
|
||||
|
||||
assert.strictEqual(result, null);
|
||||
assert.ok(logger.logs.some(l => l.level === 'error'));
|
||||
});
|
||||
|
||||
it('should clear the batch after sending', async () => {
|
||||
const llmPayload = { entities: [], facts: [] };
|
||||
mockHttpRequest(JSON.stringify({ response: JSON.stringify(llmPayload) }));
|
||||
|
||||
enhancer.addToBatch({ id: 'm1', text: 'Test' });
|
||||
await enhancer.sendBatch();
|
||||
|
||||
// Second send should return null (empty batch)
|
||||
const result = await enhancer.sendBatch();
|
||||
assert.strictEqual(result, null);
|
||||
});
|
||||
|
||||
it('should handle LLM response with missing entities/facts gracefully', async () => {
|
||||
mockHttpRequest(JSON.stringify({ response: '{}' }));
|
||||
|
||||
enhancer.addToBatch({ id: 'm1', text: 'Test' });
|
||||
const result = await enhancer.sendBatch();
|
||||
|
||||
assert.ok(result);
|
||||
assert.strictEqual(result.entities.length, 0);
|
||||
assert.strictEqual(result.facts.length, 0);
|
||||
});
|
||||
});
|
||||
117
test/maintenance.test.ts
Normal file
117
test/maintenance.test.ts
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
// test/maintenance.test.ts
|
||||
|
||||
import { describe, it, beforeEach, afterEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import { Maintenance } from '../src/maintenance.js';
|
||||
import { Embeddings } from '../src/embeddings.js';
|
||||
import type { KnowledgeConfig, Logger, Fact } from '../src/types.js';
|
||||
import * as timers from 'node:timers/promises';
|
||||
|
||||
class MockFactStore {
|
||||
decayRate = 0;
|
||||
decayedCount = 0;
|
||||
unembeddedFacts: Fact[] = [];
|
||||
markedAsEmbeddedIds: string[] = [];
|
||||
|
||||
decayFacts(rate: number) {
|
||||
this.decayRate = rate;
|
||||
this.decayedCount++;
|
||||
return { decayedCount: 1 };
|
||||
}
|
||||
getUnembeddedFacts() { return this.unembeddedFacts; }
|
||||
markFactsAsEmbedded(ids: string[]) {
|
||||
this.markedAsEmbeddedIds.push(...ids);
|
||||
// Clear unembedded facts after marking (mimics real behavior)
|
||||
this.unembeddedFacts = this.unembeddedFacts.filter(f => !ids.includes(f.id));
|
||||
}
|
||||
}
|
||||
|
||||
class MockEmbeddings {
|
||||
isEnabledState = true;
|
||||
syncedFacts: Fact[] = [];
|
||||
isEnabled() { return this.isEnabledState; }
|
||||
sync(facts: Fact[]) {
|
||||
this.syncedFacts.push(...facts);
|
||||
return Promise.resolve(facts.length);
|
||||
}
|
||||
}
|
||||
|
||||
const createMockLogger = (): Logger => ({
|
||||
info: () => {}, warn: () => {}, error: () => {}, debug: () => {},
|
||||
});
|
||||
|
||||
const mockConfig: KnowledgeConfig = {
|
||||
enabled: true,
|
||||
workspace: '/tmp',
|
||||
decay: { enabled: true, intervalHours: 0.0001, rate: 0.1 },
|
||||
embeddings: {
|
||||
enabled: true,
|
||||
syncIntervalMinutes: 0.0001,
|
||||
endpoint: 'http://test.com',
|
||||
collectionName: 'test',
|
||||
},
|
||||
storage: { maxEntities: 100, maxFacts: 100, writeDebounceMs: 0 },
|
||||
extraction: {
|
||||
regex: { enabled: true },
|
||||
llm: { enabled: false, model: '', endpoint: '', batchSize: 1, cooldownMs: 0 },
|
||||
},
|
||||
};
|
||||
|
||||
describe('Maintenance', () => {
|
||||
let logger: Logger;
|
||||
let mockFactStore: MockFactStore;
|
||||
let mockEmbeddings: MockEmbeddings;
|
||||
let maintenance: Maintenance;
|
||||
|
||||
beforeEach(() => {
|
||||
logger = createMockLogger();
|
||||
mockFactStore = new MockFactStore();
|
||||
mockEmbeddings = new MockEmbeddings();
|
||||
// @ts-ignore - Using mock class
|
||||
maintenance = new Maintenance(mockConfig, logger, mockFactStore, mockEmbeddings);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
maintenance.stop();
|
||||
});
|
||||
|
||||
it('should schedule and run decay task', async () => {
|
||||
maintenance.start();
|
||||
await timers.setTimeout(mockConfig.decay.intervalHours * 60 * 60 * 1000 + 10);
|
||||
assert.strictEqual(mockFactStore.decayedCount > 0, true);
|
||||
assert.strictEqual(mockFactStore.decayRate, mockConfig.decay.rate);
|
||||
});
|
||||
|
||||
it('should schedule and run embeddings sync task', async () => {
|
||||
const testFact: Fact = {
|
||||
id: 'fact1', subject: 's', predicate: 'p', object: 'o',
|
||||
relevance: 1, createdAt: 't', lastAccessed: 't', source: 'ingested',
|
||||
};
|
||||
mockFactStore.unembeddedFacts = [testFact];
|
||||
maintenance.start();
|
||||
await timers.setTimeout(mockConfig.embeddings.syncIntervalMinutes * 60 * 1000 + 10);
|
||||
assert.strictEqual(mockEmbeddings.syncedFacts.length > 0, true);
|
||||
assert.deepStrictEqual(mockEmbeddings.syncedFacts[0], testFact);
|
||||
assert.deepStrictEqual(mockFactStore.markedAsEmbeddedIds, [testFact.id]);
|
||||
});
|
||||
|
||||
it('should not schedule embeddings if disabled', async () => {
|
||||
mockEmbeddings.isEnabledState = false;
|
||||
maintenance.start();
|
||||
await timers.setTimeout(5);
|
||||
assert.strictEqual(mockEmbeddings.syncedFacts.length, 0);
|
||||
});
|
||||
|
||||
it('should stop all timers cleanly', () => {
|
||||
maintenance.start();
|
||||
maintenance.stop();
|
||||
// No error means timers were cleared successfully
|
||||
assert.ok(true);
|
||||
});
|
||||
|
||||
it('should run decay manually', () => {
|
||||
maintenance.runDecay();
|
||||
assert.strictEqual(mockFactStore.decayedCount, 1);
|
||||
assert.strictEqual(mockFactStore.decayRate, mockConfig.decay.rate);
|
||||
});
|
||||
});
|
||||
123
test/patterns.test.ts
Normal file
123
test/patterns.test.ts
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
// test/patterns.test.ts
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import { REGEX_PATTERNS } from '../src/patterns.js';
|
||||
|
||||
type TestCase = [string, string | null | string[]];
|
||||
|
||||
const runTestCases = (regex: RegExp, testCases: TestCase[]) => {
|
||||
for (const [input, expected] of testCases) {
|
||||
// Reset regex state for each test case
|
||||
regex.lastIndex = 0;
|
||||
const matches = input.match(regex);
|
||||
if (expected === null) {
|
||||
assert.strictEqual(matches, null, `Expected no match for: "${input}"`);
|
||||
} else if (Array.isArray(expected)) {
|
||||
assert.deepStrictEqual(matches, expected, `Mismatch for: "${input}"`);
|
||||
} else {
|
||||
assert.deepStrictEqual(matches, [expected], `Mismatch for: "${input}"`);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
describe('REGEX_PATTERNS', () => {
|
||||
|
||||
it('should match valid email addresses', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['contact support at support@example.com', 'support@example.com'],
|
||||
['my email is john.doe123@sub.domain.co.uk.', 'john.doe123@sub.domain.co.uk'],
|
||||
['invalid-email@', null],
|
||||
['user@localhost', null],
|
||||
['test@.com', null],
|
||||
['multiple emails: a@b.com and c@d.org', ['a@b.com', 'c@d.org']],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.email, testCases);
|
||||
});
|
||||
|
||||
it('should match valid URLs', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['visit https://www.example.com for more info', 'https://www.example.com'],
|
||||
['check http://sub.domain.org/path?query=1', 'http://sub.domain.org/path?query=1'],
|
||||
['ftp://invalid.com', null],
|
||||
['www.example.com', null],
|
||||
['a link: https://a.co and another http://b.com/end.', ['https://a.co', 'http://b.com/end']],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.url, testCases);
|
||||
});
|
||||
|
||||
it('should match ISO 8601 dates', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['The date is 2026-02-17.', '2026-02-17'],
|
||||
['Timestamp: 2026-02-17T15:30:00Z', '2026-02-17T15:30:00Z'],
|
||||
['With milliseconds: 2026-02-17T15:30:00.123Z', '2026-02-17T15:30:00.123Z'],
|
||||
['Not a date: 2026-02-17T', null],
|
||||
['Invalid format 2026/02/17', null],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.iso_date, testCases);
|
||||
});
|
||||
|
||||
it('should match common date formats (US & EU)', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['US date: 02/17/2026.', '02/17/2026'],
|
||||
['EU date: 17.02.2026,', '17.02.2026'],
|
||||
['Short year: 1.1.99', '1.1.99'],
|
||||
['Two dates: 12/25/2024 and 24.12.2024', ['12/25/2024', '24.12.2024']],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.common_date, testCases);
|
||||
});
|
||||
|
||||
it('should match German date formats', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['Datum: 17. Februar 2026', '17. Februar 2026'],
|
||||
['Am 1. Januar 2025 war es kalt.', '1. Januar 2025'],
|
||||
['No match: 17 Februar 2026', null],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.german_date, testCases);
|
||||
});
|
||||
|
||||
it('should match English date formats', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['Date: February 17, 2026', 'February 17, 2026'],
|
||||
['On March 1st, 2025, we launched.', 'March 1st, 2025'],
|
||||
['Also August 2nd, 2024 and May 3rd, 2023.', ['August 2nd, 2024', 'May 3rd, 2023']],
|
||||
['No match: February 17 2026', null],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.english_date, testCases);
|
||||
});
|
||||
|
||||
it('should match proper nouns (names, places)', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['Hello, my name is Claude Keller.', ['Claude Keller']],
|
||||
['This is Jean-Luc Picard of the USS Enterprise.', ['Jean-Luc Picard', 'USS Enterprise']],
|
||||
['Talk to O\'Malley about it.', ['O\'Malley']],
|
||||
['OpenClaw is a project.', ['OpenClaw']],
|
||||
['Not a name: lower case', null],
|
||||
['Multiple: Forge and Atlas are agents.', ['Forge', 'Atlas']],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.proper_noun, testCases);
|
||||
});
|
||||
|
||||
it('should match product-like names', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['I have an iPhone 15.', 'iPhone 15'],
|
||||
['We are using Windows 11.', 'Windows 11'],
|
||||
['The latest model is GPT-4.', 'GPT-4'],
|
||||
['Also look at ProductX.', 'ProductX'],
|
||||
['The Roman Empire used IV.', 'Roman Empire used IV'], // Imperfect but acceptable
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.product_name, testCases);
|
||||
});
|
||||
|
||||
it('should match organization names with suffixes', () => {
|
||||
const testCases: TestCase[] = [
|
||||
['He works at Vainplex GmbH.', 'Vainplex GmbH'],
|
||||
['The owner of Stark Industries, LLC is Tony Stark.', 'Stark Industries, LLC'],
|
||||
['Globex Corp. is another example.', 'Globex Corp.'],
|
||||
['This also catches Acme Inc. and Cyberdyne Systems Ltd.', ['Acme Inc.', 'Cyberdyne Systems Ltd.']],
|
||||
['No match for Vainplex alone', null],
|
||||
];
|
||||
runTestCases(REGEX_PATTERNS.organization_suffix, testCases);
|
||||
});
|
||||
|
||||
});
|
||||
86
test/storage.test.ts
Normal file
86
test/storage.test.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
// test/storage.test.ts
|
||||
|
||||
import { describe, it, before, after, beforeEach } from 'node:test';
|
||||
import * as assert from 'node:assert';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import { AtomicStorage } from '../src/storage.js';
|
||||
import type { Logger } from '../src/types.js';
|
||||
|
||||
const createMockLogger = (): Logger & { logs: { level: string; msg: string }[] } => {
|
||||
const logs: { level: string; msg: string }[] = [];
|
||||
return { logs, info: (msg) => logs.push({ level: 'info', msg }), warn: (msg) => logs.push({ level: 'warn', msg }), error: (msg) => logs.push({ level: 'error', msg }), debug: (msg) => logs.push({ level: 'debug', msg }) };
|
||||
};
|
||||
|
||||
describe('AtomicStorage', () => {
|
||||
const testDir = path.join('/tmp', `atomic-storage-test-${Date.now()}`);
|
||||
let logger: ReturnType<typeof createMockLogger>;
|
||||
let storage: AtomicStorage;
|
||||
|
||||
before(async () => await fs.mkdir(testDir, { recursive: true }));
|
||||
after(async () => await fs.rm(testDir, { recursive: true, force: true }));
|
||||
beforeEach(() => { logger = createMockLogger(); storage = new AtomicStorage(testDir, logger); });
|
||||
|
||||
it('should initialize and create the storage directory', async () => {
|
||||
const newDir = path.join(testDir, 'new-dir');
|
||||
const newStorage = new AtomicStorage(newDir, logger);
|
||||
await newStorage.init();
|
||||
const stats = await fs.stat(newDir);
|
||||
assert.ok(stats.isDirectory(), 'Directory should be created');
|
||||
});
|
||||
|
||||
describe('writeJson', () => {
|
||||
it('should write a JSON object to a file', async () => {
|
||||
const fileName = 'test.json';
|
||||
const data = { key: 'value', number: 123 };
|
||||
await storage.writeJson(fileName, data);
|
||||
const content = await fs.readFile(path.join(testDir, fileName), 'utf-8');
|
||||
assert.deepStrictEqual(JSON.parse(content), data);
|
||||
});
|
||||
});
|
||||
|
||||
describe('readJson', () => {
|
||||
it('should read and parse a valid JSON file', async () => {
|
||||
const fileName = 'read.json';
|
||||
const data = { a: 1, b: [2, 3] };
|
||||
await fs.writeFile(path.join(testDir, fileName), JSON.stringify(data));
|
||||
const result = await storage.readJson<typeof data>(fileName);
|
||||
assert.deepStrictEqual(result, data);
|
||||
});
|
||||
|
||||
it('should return null if the file does not exist', async () => {
|
||||
const result = await storage.readJson('nonexistent.json');
|
||||
assert.strictEqual(result, null);
|
||||
});
|
||||
});
|
||||
|
||||
describe('debounce', () => {
|
||||
it('should only call the async function once after the delay', async () => {
|
||||
let callCount = 0;
|
||||
const asyncFn = async () => { callCount++; return callCount; };
|
||||
const debouncedFn = AtomicStorage.debounce(asyncFn, 50);
|
||||
|
||||
const p1 = debouncedFn();
|
||||
const p2 = debouncedFn();
|
||||
const p3 = debouncedFn();
|
||||
|
||||
const results = await Promise.all([p1, p2, p3]);
|
||||
assert.strictEqual(callCount, 1);
|
||||
assert.deepStrictEqual(results, [1, 1, 1]);
|
||||
});
|
||||
|
||||
it('should pass the arguments of the last call to the async function', async () => {
|
||||
let finalArgs: any[] = [];
|
||||
const asyncFn = async (...args: any[]) => { finalArgs = args; return finalArgs; };
|
||||
const debouncedFn = AtomicStorage.debounce(asyncFn, 50);
|
||||
|
||||
debouncedFn(1);
|
||||
debouncedFn(2, 3);
|
||||
const finalPromise = debouncedFn(4, 5, 6);
|
||||
|
||||
const result = await finalPromise;
|
||||
assert.deepStrictEqual(finalArgs, [4, 5, 6]);
|
||||
assert.deepStrictEqual(result, [4, 5, 6]);
|
||||
});
|
||||
});
|
||||
});
|
||||
26
tsconfig.json
Normal file
26
tsconfig.json
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"lib": ["ES2022"],
|
||||
"moduleResolution": "node",
|
||||
"esModuleInterop": true,
|
||||
"strict": true,
|
||||
"noImplicitAny": true,
|
||||
"strictNullChecks": true,
|
||||
"strictFunctionTypes": true,
|
||||
"strictBindCallApply": true,
|
||||
"strictPropertyInitialization": true,
|
||||
"noImplicitThis": true,
|
||||
"alwaysStrict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noImplicitReturns": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"skipLibCheck": true,
|
||||
"outDir": "./dist",
|
||||
"declaration": true
|
||||
},
|
||||
"include": ["src/**/*.ts", "index.ts"],
|
||||
"exclude": ["node_modules", "dist", "test"]
|
||||
}
|
||||
Loading…
Reference in a new issue