"""Tests for intelligence/knowledge_cleanup.py — Knowledge Graph Cleanup.""" import json import math import sys from datetime import datetime, timedelta from pathlib import Path from unittest.mock import patch, MagicMock sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence")) import cortex.intelligence.knowledge_cleanup as kc class TestBackup: def test_creates_backup(self, tmp_path): src = tmp_path / "test.json" src.write_text('{"a": 1}') backup_path = kc.backup(src) assert backup_path.exists() assert "backup_" in backup_path.name class TestAtomicWrite: def test_writes_atomically(self, tmp_path): path = tmp_path / "out.json" kc.atomic_write(path, {"key": "value"}) assert json.loads(path.read_text()) == {"key": "value"} class TestFindDuplicates: def test_no_duplicates(self): entities = {"albert": {}, "mondo-gate": {}} groups = kc.find_duplicates(entities) assert len(groups) == 0 def test_case_duplicates(self): entities = {"Albert": {}, "albert": {}, "ALBERT": {}} groups = kc.find_duplicates(entities) assert len(groups) >= 1 def test_substring_duplicates(self): entities = {"mondo": {"type": "company"}, "mondo gate": {"type": "company"}} groups = kc.find_duplicates(entities) assert len(groups) >= 1 class TestPickCanonical: def test_prefers_uppercase(self): names = ["albert", "Albert"] entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}} assert kc.pick_canonical(names, entities) == "Albert" def test_prefers_more_fields(self): names = ["a", "A"] entities = {"a": {"type": "person"}, "A": {"type": "person", "source": "x", "extra": "y"}} assert kc.pick_canonical(names, entities) == "A" class TestDeduplicate: def test_merges_entities(self): entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}} rels = {} e, r = kc.deduplicate(entities, rels, dry_run=False) assert len(e) == 1 def test_dry_run_no_change(self): entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}} rels = {} e, r = kc.deduplicate(entities, rels, dry_run=True) assert len(e) == 2 # unchanged in dry run def test_updates_relationships(self): entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}} rels = { "albert::mondo": {"a": "albert", "b": "mondo", "types": ["co-occurrence"], "count": 1, "first_seen": "2026-01-01", "last_seen": "2026-01-01"}, } e, r = kc.deduplicate(entities, rels, dry_run=False) # Relationship should be remapped to canonical assert len(r) == 1 class TestScoreRelationships: def test_scores_assigned(self): rels = { "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat(), "first_seen": "2026-01-01"}, } result = kc.score_relationships(rels, dry_run=False) assert "strength" in result["a::b"] assert 0 < result["a::b"]["strength"] <= 1 def test_removes_weak(self): old_date = (datetime.now() - timedelta(days=300)).isoformat() rels = { "a::b": {"count": 1, "types": ["co-occurrence"], "last_seen": old_date, "first_seen": old_date}, } result = kc.score_relationships(rels, dry_run=False) # Very old + low count should have low strength if len(result) > 0: assert result["a::b"]["strength"] < 0.3 def test_dry_run(self): rels = { "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat()}, } result = kc.score_relationships(rels, dry_run=True) assert "strength" not in result["a::b"] class TestClassifyUnknowns: @patch("cortex.intelligence.knowledge_cleanup.ollama_generate") def test_no_unknowns(self, mock_ollama): entities = {"albert": {"type": "person"}} result = kc.classify_unknowns(entities, dry_run=False) mock_ollama.assert_not_called() assert result == entities @patch("cortex.intelligence.knowledge_cleanup.ollama_generate") def test_classifies_unknowns(self, mock_ollama): mock_ollama.return_value = '{"1": "person"}' entities = {"albert": {"type": "unknown"}} result = kc.classify_unknowns(entities, dry_run=False) assert result["albert"]["type"] == "person" @patch("cortex.intelligence.knowledge_cleanup.ollama_generate") def test_dry_run_no_change(self, mock_ollama): mock_ollama.return_value = '{"1": "person"}' entities = {"albert": {"type": "unknown"}} result = kc.classify_unknowns(entities, dry_run=True) assert result["albert"]["type"] == "unknown" @patch("cortex.intelligence.knowledge_cleanup.ollama_generate") def test_handles_llm_failure(self, mock_ollama): mock_ollama.side_effect = Exception("timeout") entities = {"albert": {"type": "unknown"}} result = kc.classify_unknowns(entities, dry_run=False) assert result["albert"]["type"] == "unknown" # unchanged