Some checks failed
Tests / test (push) Failing after 2s
- Merged all unique darkplex-core modules into cortex: - intelligence/ subfolder (anticipator, collective, shared_memory, knowledge_cleanup, temporal, llm_extractor, loop) - governance/ subfolder (policy engine, risk scorer, evidence, enforcer, report generator) - entity_manager.py, knowledge_extractor.py - Fixed bare 'from intelligence.' imports to 'from cortex.intelligence.' - Added 'darkplex' CLI alias alongside 'cortex' - Package renamed to darkplex-core v0.2.0 - 405 tests passing (was 234) - 14 new test files covering all merged modules
147 lines
5.3 KiB
Python
147 lines
5.3 KiB
Python
"""Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor."""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence"))
|
|
|
|
from llm_extractor import (
|
|
_parse_json_response,
|
|
_normalize_entities,
|
|
extract_entities_llm,
|
|
extract_entities_llm_batch,
|
|
is_available,
|
|
VALID_TYPES,
|
|
)
|
|
|
|
|
|
class TestParseJsonResponse:
|
|
def test_empty(self):
|
|
assert _parse_json_response("") == {}
|
|
assert _parse_json_response(None) == {}
|
|
|
|
def test_plain_json(self):
|
|
r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}')
|
|
assert "albert" in r
|
|
|
|
def test_markdown_fenced(self):
|
|
r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```')
|
|
assert "albert" in r
|
|
|
|
def test_no_json(self):
|
|
assert _parse_json_response("no json here") == {}
|
|
|
|
def test_nested_braces(self):
|
|
r = _parse_json_response('{"a": {"type": "person", "context": "test"}}')
|
|
assert "a" in r
|
|
|
|
|
|
class TestNormalizeEntities:
|
|
def test_valid_entity(self):
|
|
raw = {"Albert": {"type": "person", "context": "CEO of company"}}
|
|
result = _normalize_entities(raw)
|
|
assert "albert" in result
|
|
assert result["albert"]["type"] == "person"
|
|
assert result["albert"]["match"] == "llm"
|
|
|
|
def test_type_alias(self):
|
|
raw = {"python": {"type": "language", "context": "programming"}}
|
|
result = _normalize_entities(raw)
|
|
assert result["python"]["type"] == "technology"
|
|
|
|
def test_unknown_type_becomes_concept(self):
|
|
raw = {"thing": {"type": "xyzzy", "context": "unknown"}}
|
|
result = _normalize_entities(raw)
|
|
assert result["thing"]["type"] == "concept"
|
|
|
|
def test_filters_short_names(self):
|
|
raw = {"x": {"type": "person", "context": "test"}}
|
|
result = _normalize_entities(raw)
|
|
assert len(result) == 0
|
|
|
|
def test_filters_long_names(self):
|
|
raw = {"a" * 81: {"type": "person", "context": "test"}}
|
|
result = _normalize_entities(raw)
|
|
assert len(result) == 0
|
|
|
|
def test_non_dict_info_skipped(self):
|
|
raw = {"test": "not a dict"}
|
|
result = _normalize_entities(raw)
|
|
assert len(result) == 0
|
|
|
|
def test_context_truncated(self):
|
|
raw = {"test": {"type": "person", "context": "x" * 200}}
|
|
result = _normalize_entities(raw)
|
|
assert len(result["test"]["context"]) <= 100
|
|
|
|
def test_underscores_to_hyphens(self):
|
|
raw = {"mondo_gate": {"type": "company", "context": "test"}}
|
|
result = _normalize_entities(raw)
|
|
assert "mondo-gate" in result
|
|
|
|
|
|
class TestExtractEntitiesLlm:
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_empty_text(self, mock_ollama):
|
|
assert extract_entities_llm("") == {}
|
|
assert extract_entities_llm("short") == {}
|
|
mock_ollama.assert_not_called()
|
|
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_ollama_unavailable(self, mock_ollama):
|
|
mock_ollama.return_value = None
|
|
result = extract_entities_llm("This is a test about Albert and Mondo Gate AG")
|
|
assert result is None # signals fallback
|
|
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_successful_extraction(self, mock_ollama):
|
|
mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}'
|
|
result = extract_entities_llm("Albert discussed the project with the team members today")
|
|
assert "albert" in result
|
|
assert result["albert"]["type"] == "person"
|
|
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_truncates_long_text(self, mock_ollama):
|
|
mock_ollama.return_value = "{}"
|
|
extract_entities_llm("x" * 3000)
|
|
call_args = mock_ollama.call_args[0][0]
|
|
# The text in the prompt should be truncated
|
|
assert len(call_args) < 3000 + 500 # prompt overhead
|
|
|
|
|
|
class TestExtractEntitiesLlmBatch:
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_empty_list(self, mock_ollama):
|
|
assert extract_entities_llm_batch([]) == {}
|
|
mock_ollama.assert_not_called()
|
|
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_filters_short_texts(self, mock_ollama):
|
|
mock_ollama.return_value = "{}"
|
|
result = extract_entities_llm_batch(["hi", "yo", ""])
|
|
assert result == {}
|
|
mock_ollama.assert_not_called()
|
|
|
|
@patch("llm_extractor._call_ollama")
|
|
def test_batch_extraction(self, mock_ollama):
|
|
mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}'
|
|
result = extract_entities_llm_batch(["Python is a great programming language for data science"])
|
|
assert "python" in result
|
|
|
|
|
|
class TestIsAvailable:
|
|
@patch("llm_extractor.urllib.request.urlopen")
|
|
def test_available(self, mock_urlopen):
|
|
mock_resp = MagicMock()
|
|
mock_resp.status = 200
|
|
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
|
|
mock_resp.__exit__ = MagicMock(return_value=False)
|
|
mock_urlopen.return_value = mock_resp
|
|
assert is_available() is True
|
|
|
|
@patch("llm_extractor.urllib.request.urlopen")
|
|
def test_unavailable(self, mock_urlopen):
|
|
mock_urlopen.side_effect = Exception("connection refused")
|
|
assert is_available() is False
|