darkplex-core/tests/test_knowledge_cleanup.py

"""Tests for intelligence/knowledge_cleanup.py — Knowledge Graph Cleanup."""

import json
import math
import sys
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock

sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))

import knowledge_cleanup as kc


class TestBackup:
    def test_creates_backup(self, tmp_path):
        src = tmp_path / "test.json"
        src.write_text('{"a": 1}')
        backup_path = kc.backup(src)
        assert backup_path.exists()
        assert "backup_" in backup_path.name

class TestAtomicWrite:
    def test_writes_atomically(self, tmp_path):
        path = tmp_path / "out.json"
        kc.atomic_write(path, {"key": "value"})
        assert json.loads(path.read_text()) == {"key": "value"}


class TestFindDuplicates:
    def test_no_duplicates(self):
        entities = {"albert": {}, "mondo-gate": {}}
        groups = kc.find_duplicates(entities)
        assert len(groups) == 0

    def test_case_duplicates(self):
        entities = {"Albert": {}, "albert": {}, "ALBERT": {}}
        groups = kc.find_duplicates(entities)
        assert len(groups) >= 1

    def test_substring_duplicates(self):
        entities = {"mondo": {"type": "company"}, "mondo gate": {"type": "company"}}
        groups = kc.find_duplicates(entities)
        assert len(groups) >= 1


class TestPickCanonical:
    def test_prefers_uppercase(self):
        names = ["albert", "Albert"]
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
        assert kc.pick_canonical(names, entities) == "Albert"

    def test_prefers_more_fields(self):
        names = ["a", "A"]
        entities = {"a": {"type": "person"}, "A": {"type": "person", "source": "x", "extra": "y"}}
        assert kc.pick_canonical(names, entities) == "A"


class TestDeduplicate:
    def test_merges_entities(self):
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person", "source": "manual"}}
        rels = {}
        e, r = kc.deduplicate(entities, rels, dry_run=False)
        assert len(e) == 1

    def test_dry_run_no_change(self):
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
        rels = {}
        e, r = kc.deduplicate(entities, rels, dry_run=True)
        assert len(e) == 2  # unchanged in dry run

    def test_updates_relationships(self):
        entities = {"albert": {"type": "person"}, "Albert": {"type": "person"}}
        rels = {
            "albert::mondo": {"a": "albert", "b": "mondo", "types": ["co-occurrence"], "count": 1, "first_seen": "2026-01-01", "last_seen": "2026-01-01"},
        }
        e, r = kc.deduplicate(entities, rels, dry_run=False)
        # Relationship should be remapped to canonical
        assert len(r) == 1


class TestScoreRelationships:
    def test_scores_assigned(self):
        rels = {
            "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat(), "first_seen": "2026-01-01"},
        }
        result = kc.score_relationships(rels, dry_run=False)
        assert "strength" in result["a::b"]
        assert 0 < result["a::b"]["strength"] <= 1

    def test_removes_weak(self):
        old_date = (datetime.now() - timedelta(days=300)).isoformat()
        rels = {
            "a::b": {"count": 1, "types": ["co-occurrence"], "last_seen": old_date, "first_seen": old_date},
        }
        result = kc.score_relationships(rels, dry_run=False)
        # Very old + low count should have low strength
        if len(result) > 0:
            assert result["a::b"]["strength"] < 0.3

    def test_dry_run(self):
        rels = {
            "a::b": {"count": 10, "types": ["co-occurrence"], "last_seen": datetime.now().isoformat()},
        }
        result = kc.score_relationships(rels, dry_run=True)
        assert "strength" not in result["a::b"]


class TestClassifyUnknowns:
    @patch("knowledge_cleanup.ollama_generate")
    def test_no_unknowns(self, mock_ollama):
        entities = {"albert": {"type": "person"}}
        result = kc.classify_unknowns(entities, dry_run=False)
        mock_ollama.assert_not_called()
        assert result == entities

    @patch("knowledge_cleanup.ollama_generate")
    def test_classifies_unknowns(self, mock_ollama):
        mock_ollama.return_value = '{"1": "person"}'
        entities = {"albert": {"type": "unknown"}}
        result = kc.classify_unknowns(entities, dry_run=False)
        assert result["albert"]["type"] == "person"

    @patch("knowledge_cleanup.ollama_generate")
    def test_dry_run_no_change(self, mock_ollama):
        mock_ollama.return_value = '{"1": "person"}'
        entities = {"albert": {"type": "unknown"}}
        result = kc.classify_unknowns(entities, dry_run=True)
        assert result["albert"]["type"] == "unknown"

    @patch("knowledge_cleanup.ollama_generate")
    def test_handles_llm_failure(self, mock_ollama):
        mock_ollama.side_effect = Exception("timeout")
        entities = {"albert": {"type": "unknown"}}
        result = kc.classify_unknowns(entities, dry_run=False)
        assert result["albert"]["type"] == "unknown"  # unchanged