openclaw-knowledge-engine/test/patterns.test.ts
Claudia 8964d93c60 feat: knowledge-engine v0.1.0 — all Cerberus findings fixed
- 83/83 tests passing (was 32/45)
- New: src/http-client.ts (shared HTTP/HTTPS client, fixes C2+H1)
- Fixed: proper_noun regex exclusions (C6)
- Fixed: shutdown hooks registered in hooks.ts (C3)
- Fixed: all timers use .unref() (H6)
- Fixed: resolveConfig split into smaller functions (C4)
- Fixed: extract() split with processMatch helper (C5)
- Fixed: FactStore.addFact isLoaded guard (H3)
- Fixed: validateConfig split (H2)
- Fixed: type-safe config merge, removed as any (H4)
- Added: http-client tests, expanded coverage (H5)
- Fixed: LLM batch await (S1), fresh RegExp per call (S2)
- 1530 LOC source, 1298 LOC tests, strict TypeScript
2026-02-17 16:10:13 +01:00

123 lines
4.7 KiB
TypeScript

// test/patterns.test.ts
import { describe, it } from 'node:test';
import * as assert from 'node:assert';
import { REGEX_PATTERNS } from '../src/patterns.js';
type TestCase = [string, string | null | string[]];
const runTestCases = (regex: RegExp, testCases: TestCase[]) => {
for (const [input, expected] of testCases) {
// Reset regex state for each test case
regex.lastIndex = 0;
const matches = input.match(regex);
if (expected === null) {
assert.strictEqual(matches, null, `Expected no match for: "${input}"`);
} else if (Array.isArray(expected)) {
assert.deepStrictEqual(matches, expected, `Mismatch for: "${input}"`);
} else {
assert.deepStrictEqual(matches, [expected], `Mismatch for: "${input}"`);
}
}
};
describe('REGEX_PATTERNS', () => {
it('should match valid email addresses', () => {
const testCases: TestCase[] = [
['contact support at support@example.com', 'support@example.com'],
['my email is john.doe123@sub.domain.co.uk.', 'john.doe123@sub.domain.co.uk'],
['invalid-email@', null],
['user@localhost', null],
['test@.com', null],
['multiple emails: a@b.com and c@d.org', ['a@b.com', 'c@d.org']],
];
runTestCases(REGEX_PATTERNS.email, testCases);
});
it('should match valid URLs', () => {
const testCases: TestCase[] = [
['visit https://www.example.com for more info', 'https://www.example.com'],
['check http://sub.domain.org/path?query=1', 'http://sub.domain.org/path?query=1'],
['ftp://invalid.com', null],
['www.example.com', null],
['a link: https://a.co and another http://b.com/end.', ['https://a.co', 'http://b.com/end']],
];
runTestCases(REGEX_PATTERNS.url, testCases);
});
it('should match ISO 8601 dates', () => {
const testCases: TestCase[] = [
['The date is 2026-02-17.', '2026-02-17'],
['Timestamp: 2026-02-17T15:30:00Z', '2026-02-17T15:30:00Z'],
['With milliseconds: 2026-02-17T15:30:00.123Z', '2026-02-17T15:30:00.123Z'],
['Not a date: 2026-02-17T', null],
['Invalid format 2026/02/17', null],
];
runTestCases(REGEX_PATTERNS.iso_date, testCases);
});
it('should match common date formats (US & EU)', () => {
const testCases: TestCase[] = [
['US date: 02/17/2026.', '02/17/2026'],
['EU date: 17.02.2026,', '17.02.2026'],
['Short year: 1.1.99', '1.1.99'],
['Two dates: 12/25/2024 and 24.12.2024', ['12/25/2024', '24.12.2024']],
];
runTestCases(REGEX_PATTERNS.common_date, testCases);
});
it('should match German date formats', () => {
const testCases: TestCase[] = [
['Datum: 17. Februar 2026', '17. Februar 2026'],
['Am 1. Januar 2025 war es kalt.', '1. Januar 2025'],
['No match: 17 Februar 2026', null],
];
runTestCases(REGEX_PATTERNS.german_date, testCases);
});
it('should match English date formats', () => {
const testCases: TestCase[] = [
['Date: February 17, 2026', 'February 17, 2026'],
['On March 1st, 2025, we launched.', 'March 1st, 2025'],
['Also August 2nd, 2024 and May 3rd, 2023.', ['August 2nd, 2024', 'May 3rd, 2023']],
['No match: February 17 2026', null],
];
runTestCases(REGEX_PATTERNS.english_date, testCases);
});
it('should match proper nouns (names, places)', () => {
const testCases: TestCase[] = [
['Hello, my name is Claude Keller.', ['Claude Keller']],
['This is Jean-Luc Picard of the USS Enterprise.', ['Jean-Luc Picard', 'USS Enterprise']],
['Talk to O\'Malley about it.', ['O\'Malley']],
['OpenClaw is a project.', ['OpenClaw']],
['Not a name: lower case', null],
['Multiple: Forge and Atlas are agents.', ['Forge', 'Atlas']],
];
runTestCases(REGEX_PATTERNS.proper_noun, testCases);
});
it('should match product-like names', () => {
const testCases: TestCase[] = [
['I have an iPhone 15.', 'iPhone 15'],
['We are using Windows 11.', 'Windows 11'],
['The latest model is GPT-4.', 'GPT-4'],
['Also look at ProductX.', 'ProductX'],
['The Roman Empire used IV.', 'Roman Empire used IV'], // Imperfect but acceptable
];
runTestCases(REGEX_PATTERNS.product_name, testCases);
});
it('should match organization names with suffixes', () => {
const testCases: TestCase[] = [
['He works at Vainplex GmbH.', 'Vainplex GmbH'],
['The owner of Stark Industries, LLC is Tony Stark.', 'Stark Industries, LLC'],
['Globex Corp. is another example.', 'Globex Corp.'],
['This also catches Acme Inc. and Cyberdyne Systems Ltd.', ['Acme Inc.', 'Cyberdyne Systems Ltd.']],
['No match for Vainplex alone', null],
];
runTestCases(REGEX_PATTERNS.organization_suffix, testCases);
});
});