- Replaced real names/companies with generic placeholders - Tests: Vainplex GmbH → Acme GmbH - README: Sebastian/Mondo Gate → Alex/Acme Corp - Author: OpenClaw Community - License: OpenClaw Contributors
121 lines
5.3 KiB
TypeScript
121 lines
5.3 KiB
TypeScript
// test/entity-extractor.test.ts
|
|
|
|
import { describe, it, beforeEach } from 'node:test';
|
|
import * as assert from 'node:assert';
|
|
import { EntityExtractor } from '../src/entity-extractor.js';
|
|
import type { Entity, Logger } from '../src/types.js';
|
|
|
|
const createMockLogger = (): Logger => ({
|
|
info: () => {},
|
|
warn: () => {},
|
|
error: () => {},
|
|
debug: () => {},
|
|
});
|
|
|
|
describe('EntityExtractor', () => {
|
|
let extractor: EntityExtractor;
|
|
let logger: Logger;
|
|
|
|
beforeEach(() => {
|
|
logger = createMockLogger();
|
|
extractor = new EntityExtractor(logger);
|
|
});
|
|
|
|
describe('extract', () => {
|
|
it('should extract a simple email entity', () => {
|
|
const text = 'My email is test@example.com.';
|
|
const entities = extractor.extract(text);
|
|
assert.strictEqual(entities.length, 1);
|
|
const entity = entities[0];
|
|
assert.strictEqual(entity.type, 'email');
|
|
assert.strictEqual(entity.value, 'test@example.com');
|
|
assert.strictEqual(entity.id, 'email:test@example.com');
|
|
assert.deepStrictEqual(entity.mentions, ['test@example.com']);
|
|
});
|
|
|
|
it('should extract multiple different entities', () => {
|
|
const text = 'Contact Atlas via atlas@acme.com on 2026-02-17.';
|
|
const entities = extractor.extract(text);
|
|
assert.strictEqual(entities.length, 3); // Atlas (proper_noun), email, date
|
|
|
|
const names = entities.map(e => e.value).sort();
|
|
assert.deepStrictEqual(names, ['2026-02-17', 'Atlas', 'atlas@acme.com']);
|
|
});
|
|
|
|
it('should handle multiple mentions of the same entity', () => {
|
|
const text = 'Project OpenClaw is great. I love OpenClaw!';
|
|
const entities = extractor.extract(text);
|
|
assert.strictEqual(entities.length, 1);
|
|
const entity = entities[0];
|
|
assert.strictEqual(entity.type, 'unknown'); // From proper_noun
|
|
assert.strictEqual(entity.value, 'OpenClaw');
|
|
assert.strictEqual(entity.count, 2);
|
|
assert.deepStrictEqual(entity.mentions, ['OpenClaw']);
|
|
});
|
|
|
|
it('should correctly identify and canonicalize an organization', () => {
|
|
const text = 'I work for Acme GmbH. It is a German company.';
|
|
const entities = extractor.extract(text);
|
|
const orgEntity = entities.find(e => e.type === 'organization');
|
|
|
|
assert.ok(orgEntity, 'Organization entity should be found');
|
|
assert.strictEqual(orgEntity.value, 'Acme'); // Canonicalized
|
|
assert.strictEqual(orgEntity.id, 'organization:acme');
|
|
assert.deepStrictEqual(orgEntity.mentions, ['Acme GmbH']);
|
|
});
|
|
|
|
it('should extract dates in various formats', () => {
|
|
const text = 'Event dates: 2026-01-01, 02/03/2024, and 4. Mar 2025 is the German date.';
|
|
const entities = extractor.extract(text);
|
|
const dateEntities = entities.filter(e => e.type === 'date');
|
|
assert.strictEqual(dateEntities.length, 3, 'Should find three distinct dates');
|
|
|
|
const dateValues = dateEntities.map(e => e.value).sort();
|
|
assert.deepStrictEqual(dateValues, ['02/03/2024', '2026-01-01', '4. Mar 2025']);
|
|
});
|
|
|
|
it('should return an empty array for text with no entities', () => {
|
|
const text = 'this is a plain sentence.';
|
|
const entities = extractor.extract(text);
|
|
assert.strictEqual(entities.length, 0);
|
|
});
|
|
});
|
|
|
|
describe('mergeEntities', () => {
|
|
it('should merge two disjoint lists of entities', () => {
|
|
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: '2026-01-01', mentions: ['Claude'], source: ['regex'] }];
|
|
const listB: Entity[] = [{ id: 'org:acme', type: 'organization', value: 'Acme', count: 1, importance: 0.8, lastSeen: '2026-01-01', mentions: ['Acme'], source: ['llm'] }];
|
|
|
|
const merged = EntityExtractor.mergeEntities(listA, listB);
|
|
assert.strictEqual(merged.length, 2);
|
|
});
|
|
|
|
it('should merge entities with the same ID', () => {
|
|
const date = new Date().toISOString();
|
|
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: date, mentions: ['Claude'], source: ['regex'] }];
|
|
const listB: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 2, importance: 0.85, lastSeen: date, mentions: ["claude's", "Claude"], source: ['llm'] }];
|
|
|
|
const merged = EntityExtractor.mergeEntities(listA, listB);
|
|
assert.strictEqual(merged.length, 1);
|
|
|
|
const entity = merged[0];
|
|
assert.strictEqual(entity.id, 'person:claude');
|
|
assert.strictEqual(entity.count, 3);
|
|
assert.strictEqual(entity.importance, 0.85); // Takes the max importance
|
|
assert.deepStrictEqual(entity.mentions.sort(), ["Claude", "claude's"].sort());
|
|
assert.deepStrictEqual(entity.source.sort(), ['llm', 'regex'].sort());
|
|
});
|
|
|
|
it('should handle an empty list', () => {
|
|
const listA: Entity[] = [{ id: 'person:claude', type: 'person', value: 'Claude', count: 1, importance: 0.7, lastSeen: '2026-01-01', mentions: ['Claude'], source: ['regex'] }];
|
|
const mergedA = EntityExtractor.mergeEntities(listA, []);
|
|
assert.deepStrictEqual(mergedA, listA);
|
|
|
|
const mergedB = EntityExtractor.mergeEntities([], listA);
|
|
assert.deepStrictEqual(mergedB, listA);
|
|
|
|
const mergedC = EntityExtractor.mergeEntities([], []);
|
|
assert.deepStrictEqual(mergedC, []);
|
|
});
|
|
});
|
|
});
|