darkplex-core/tests/test_llm_extractor.py
Claudia fd7d75c0ed
Some checks failed
Tests / test (push) Failing after 2s
Merge darkplex-core into cortex — unified intelligence layer v0.2.0
- Merged all unique darkplex-core modules into cortex:
  - intelligence/ subfolder (anticipator, collective, shared_memory, knowledge_cleanup, temporal, llm_extractor, loop)
  - governance/ subfolder (policy engine, risk scorer, evidence, enforcer, report generator)
  - entity_manager.py, knowledge_extractor.py
- Fixed bare 'from intelligence.' imports to 'from cortex.intelligence.'
- Added 'darkplex' CLI alias alongside 'cortex'
- Package renamed to darkplex-core v0.2.0
- 405 tests passing (was 234)
- 14 new test files covering all merged modules
2026-02-12 08:43:02 +01:00

147 lines
5.3 KiB
Python

"""Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor."""
import json
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
from llm_extractor import (
_parse_json_response,
_normalize_entities,
extract_entities_llm,
extract_entities_llm_batch,
is_available,
VALID_TYPES,
)
class TestParseJsonResponse:
def test_empty(self):
assert _parse_json_response("") == {}
assert _parse_json_response(None) == {}
def test_plain_json(self):
r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}')
assert "albert" in r
def test_markdown_fenced(self):
r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```')
assert "albert" in r
def test_no_json(self):
assert _parse_json_response("no json here") == {}
def test_nested_braces(self):
r = _parse_json_response('{"a": {"type": "person", "context": "test"}}')
assert "a" in r
class TestNormalizeEntities:
def test_valid_entity(self):
raw = {"Albert": {"type": "person", "context": "CEO of company"}}
result = _normalize_entities(raw)
assert "albert" in result
assert result["albert"]["type"] == "person"
assert result["albert"]["match"] == "llm"
def test_type_alias(self):
raw = {"python": {"type": "language", "context": "programming"}}
result = _normalize_entities(raw)
assert result["python"]["type"] == "technology"
def test_unknown_type_becomes_concept(self):
raw = {"thing": {"type": "xyzzy", "context": "unknown"}}
result = _normalize_entities(raw)
assert result["thing"]["type"] == "concept"
def test_filters_short_names(self):
raw = {"x": {"type": "person", "context": "test"}}
result = _normalize_entities(raw)
assert len(result) == 0
def test_filters_long_names(self):
raw = {"a" * 81: {"type": "person", "context": "test"}}
result = _normalize_entities(raw)
assert len(result) == 0
def test_non_dict_info_skipped(self):
raw = {"test": "not a dict"}
result = _normalize_entities(raw)
assert len(result) == 0
def test_context_truncated(self):
raw = {"test": {"type": "person", "context": "x" * 200}}
result = _normalize_entities(raw)
assert len(result["test"]["context"]) <= 100
def test_underscores_to_hyphens(self):
raw = {"mondo_gate": {"type": "company", "context": "test"}}
result = _normalize_entities(raw)
assert "mondo-gate" in result
class TestExtractEntitiesLlm:
@patch("llm_extractor._call_ollama")
def test_empty_text(self, mock_ollama):
assert extract_entities_llm("") == {}
assert extract_entities_llm("short") == {}
mock_ollama.assert_not_called()
@patch("llm_extractor._call_ollama")
def test_ollama_unavailable(self, mock_ollama):
mock_ollama.return_value = None
result = extract_entities_llm("This is a test about Albert and Mondo Gate AG")
assert result is None # signals fallback
@patch("llm_extractor._call_ollama")
def test_successful_extraction(self, mock_ollama):
mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}'
result = extract_entities_llm("Albert discussed the project with the team members today")
assert "albert" in result
assert result["albert"]["type"] == "person"
@patch("llm_extractor._call_ollama")
def test_truncates_long_text(self, mock_ollama):
mock_ollama.return_value = "{}"
extract_entities_llm("x" * 3000)
call_args = mock_ollama.call_args[0][0]
# The text in the prompt should be truncated
assert len(call_args) < 3000 + 500 # prompt overhead
class TestExtractEntitiesLlmBatch:
@patch("llm_extractor._call_ollama")
def test_empty_list(self, mock_ollama):
assert extract_entities_llm_batch([]) == {}
mock_ollama.assert_not_called()
@patch("llm_extractor._call_ollama")
def test_filters_short_texts(self, mock_ollama):
mock_ollama.return_value = "{}"
result = extract_entities_llm_batch(["hi", "yo", ""])
assert result == {}
mock_ollama.assert_not_called()
@patch("llm_extractor._call_ollama")
def test_batch_extraction(self, mock_ollama):
mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}'
result = extract_entities_llm_batch(["Python is a great programming language for data science"])
assert "python" in result
class TestIsAvailable:
@patch("llm_extractor.urllib.request.urlopen")
def test_available(self, mock_urlopen):
mock_resp = MagicMock()
mock_resp.status = 200
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
mock_resp.__exit__ = MagicMock(return_value=False)
mock_urlopen.return_value = mock_resp
assert is_available() is True
@patch("llm_extractor.urllib.request.urlopen")
def test_unavailable(self, mock_urlopen):
mock_urlopen.side_effect = Exception("connection refused")
assert is_available() is False