openclaw-knowledge-engine/test/entity-extractor.test.ts
Claudia 7ee66dd0d2 fix: anonymize all examples and test data
- Replaced real names/companies with generic placeholders
- Tests: Vainplex GmbH → Acme GmbH
- README: Sebastian/Mondo Gate → Alex/Acme Corp
- Author: OpenClaw Community
- License: OpenClaw Contributors
2026-02-17 16:29:44 +01:00

121 lines
5.3 KiB
TypeScript

// test/entity-extractor.test.ts
import { describe, it, beforeEach } from 'node:test';
import * as assert from 'node:assert';
import { EntityExtractor } from '../src/entity-extractor.js';
import type { Entity, Logger } from '../src/types.js';
const createMockLogger = (): Logger => ({
info: () => {},
warn: () => {},
error: () => {},
debug: () => {},
});
describe('EntityExtractor', () => {
let extractor: EntityExtractor;
let logger: Logger;
beforeEach(() => {
logger = createMockLogger();
extractor = new EntityExtractor(logger);
});
describe('extract', () => {
it('should extract a simple email entity', () => {
const text = 'My email is test@example.com.';
const entities = extractor.extract(text);
assert.strictEqual(entities.length, 1);
const entity = entities[0];
assert.strictEqual(entity.type, 'email');
assert.strictEqual(entity.value, 'test@example.com');
assert.strictEqual(entity.id, 'email:test@example.com');
assert.deepStrictEqual(entity.mentions, ['test@example.com']);
});
it('should extract multiple different entities', () => {
const text = 'Contact Atlas via atlas@acme.com on 2026-02-17.';
const entities = extractor.extract(text);
assert.strictEqual(entities.length, 3); // Atlas (proper_noun), email, date
const names = entities.map(e => e.value).sort();
assert.deepStrictEqual(names, ['2026-02-17', 'Atlas', 'atlas@acme.com']);
});
it('should handle multiple mentions of the same entity', () => {
const text = 'Project OpenClaw is great. I love OpenClaw!';
const entities = extractor.extract(text);
assert.strictEqual(entities.length, 1);
const entity = entities[0];
assert.strictEqual(entity.type, 'unknown'); // From proper_noun
assert.strictEqual(entity.value, 'OpenClaw');
assert.strictEqual(entity.count, 2);
assert.deepStrictEqual(entity.mentions, ['OpenClaw']);
});
it('should correctly identify and canonicalize an organization', () => {
const text = 'I work for Acme GmbH. It is a German company.';
const entities = extractor.extract(text);
const orgEntity = entities.find(e => e.type === 'organization');
assert.ok(orgEntity, 'Organization entity should be found');
assert.strictEqual(orgEntity.value, 'Acme'); // Canonicalized
assert.strictEqual(orgEntity.id, 'organization:acme');
assert.deepStrictEqual(orgEntity.mentions, ['Acme GmbH']);
});
it('should extract dates in various formats', () => {
const text = 'Event dates: 2026-01-01, 02/03/2024, and 4. Mar 2025 is the German date.';
const entities = extractor.extract(text);
const dateEntities = entities.filter(e => e.type === 'date');
assert.strictEqual(dateEntities.length, 3, 'Should find three distinct dates');
const dateValues = dateEntities.map(e => e.value).sort();
assert.deepStrictEqual(dateValues, ['02/03/2024', '2026-01-01', '4. Mar 2025']);
});
it('should return an empty array for text with no entities', () => {
const text = 'this is a plain sentence.';
const entities = extractor.extract(text);
assert.strictEqual(entities.length, 0);
});
});
describe('mergeEntities', () => {
it('should merge two disjoint lists of entities', () => {
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: '2026-01-01', mentions: ['Claude'], source: ['regex'] }];
const listB: Entity[] = [{ id: 'org:acme', type: 'organization', value: 'Acme', count: 1, importance: 0.8, lastSeen: '2026-01-01', mentions: ['Acme'], source: ['llm'] }];
const merged = EntityExtractor.mergeEntities(listA, listB);
assert.strictEqual(merged.length, 2);
});
it('should merge entities with the same ID', () => {
const date = new Date().toISOString();
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: date, mentions: ['Claude'], source: ['regex'] }];
const listB: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 2, importance: 0.85, lastSeen: date, mentions: ["claude's", "Claude"], source: ['llm'] }];
const merged = EntityExtractor.mergeEntities(listA, listB);
assert.strictEqual(merged.length, 1);
const entity = merged[0];
assert.strictEqual(entity.id, 'person:claude');
assert.strictEqual(entity.count, 3);
assert.strictEqual(entity.importance, 0.85); // Takes the max importance
assert.deepStrictEqual(entity.mentions.sort(), ["Claude", "claude's"].sort());
assert.deepStrictEqual(entity.source.sort(), ['llm', 'regex'].sort());
});
it('should handle an empty list', () => {
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: '2026-01-01', mentions: ['Claude'], source: ['regex'] }];
const mergedA = EntityExtractor.mergeEntities(listA, []);
assert.deepStrictEqual(mergedA, listA);
const mergedB = EntityExtractor.mergeEntities([], listA);
assert.deepStrictEqual(mergedB, listA);
const mergedC = EntityExtractor.mergeEntities([], []);
assert.deepStrictEqual(mergedC, []);
});
});
});