"""Tests for intelligence/llm_extractor.py — LLM-Powered Entity Extractor.""" import json import sys from pathlib import Path from unittest.mock import patch, MagicMock sys.path.insert(0, str(Path.home() / "repos" / "darkplex-core" / "intelligence")) from cortex.intelligence.llm_extractor import ( _parse_json_response, _normalize_entities, extract_entities_llm, extract_entities_llm_batch, is_available, VALID_TYPES, ) class TestParseJsonResponse: def test_empty(self): assert _parse_json_response("") == {} assert _parse_json_response(None) == {} def test_plain_json(self): r = _parse_json_response('{"albert": {"type": "person", "context": "CEO"}}') assert "albert" in r def test_markdown_fenced(self): r = _parse_json_response('```json\n{"albert": {"type": "person", "context": "CEO"}}\n```') assert "albert" in r def test_no_json(self): assert _parse_json_response("no json here") == {} def test_nested_braces(self): r = _parse_json_response('{"a": {"type": "person", "context": "test"}}') assert "a" in r class TestNormalizeEntities: def test_valid_entity(self): raw = {"Albert": {"type": "person", "context": "CEO of company"}} result = _normalize_entities(raw) assert "albert" in result assert result["albert"]["type"] == "person" assert result["albert"]["match"] == "llm" def test_type_alias(self): raw = {"python": {"type": "language", "context": "programming"}} result = _normalize_entities(raw) assert result["python"]["type"] == "technology" def test_unknown_type_becomes_concept(self): raw = {"thing": {"type": "xyzzy", "context": "unknown"}} result = _normalize_entities(raw) assert result["thing"]["type"] == "concept" def test_filters_short_names(self): raw = {"x": {"type": "person", "context": "test"}} result = _normalize_entities(raw) assert len(result) == 0 def test_filters_long_names(self): raw = {"a" * 81: {"type": "person", "context": "test"}} result = _normalize_entities(raw) assert len(result) == 0 def test_non_dict_info_skipped(self): raw = {"test": "not a dict"} result = _normalize_entities(raw) assert len(result) == 0 def test_context_truncated(self): raw = {"test": {"type": "person", "context": "x" * 200}} result = _normalize_entities(raw) assert len(result["test"]["context"]) <= 100 def test_underscores_to_hyphens(self): raw = {"mondo_gate": {"type": "company", "context": "test"}} result = _normalize_entities(raw) assert "mondo-gate" in result class TestExtractEntitiesLlm: @patch("cortex.intelligence.llm_extractor._call_ollama") def test_empty_text(self, mock_ollama): assert extract_entities_llm("") == {} assert extract_entities_llm("short") == {} mock_ollama.assert_not_called() @patch("cortex.intelligence.llm_extractor._call_ollama") def test_ollama_unavailable(self, mock_ollama): mock_ollama.return_value = None result = extract_entities_llm("This is a test about Albert and Mondo Gate AG") assert result is None # signals fallback @patch("cortex.intelligence.llm_extractor._call_ollama") def test_successful_extraction(self, mock_ollama): mock_ollama.return_value = '{"albert": {"type": "person", "context": "mentioned"}}' result = extract_entities_llm("Albert discussed the project with the team members today") assert "albert" in result assert result["albert"]["type"] == "person" @patch("cortex.intelligence.llm_extractor._call_ollama") def test_truncates_long_text(self, mock_ollama): mock_ollama.return_value = "{}" extract_entities_llm("x" * 3000) call_args = mock_ollama.call_args[0][0] # The text in the prompt should be truncated assert len(call_args) < 3000 + 500 # prompt overhead class TestExtractEntitiesLlmBatch: @patch("cortex.intelligence.llm_extractor._call_ollama") def test_empty_list(self, mock_ollama): assert extract_entities_llm_batch([]) == {} mock_ollama.assert_not_called() @patch("cortex.intelligence.llm_extractor._call_ollama") def test_filters_short_texts(self, mock_ollama): mock_ollama.return_value = "{}" result = extract_entities_llm_batch(["hi", "yo", ""]) assert result == {} mock_ollama.assert_not_called() @patch("cortex.intelligence.llm_extractor._call_ollama") def test_batch_extraction(self, mock_ollama): mock_ollama.return_value = '{"python": {"type": "technology", "context": "language"}}' result = extract_entities_llm_batch(["Python is a great programming language for data science"]) assert "python" in result class TestIsAvailable: @patch("cortex.intelligence.llm_extractor.urllib.request.urlopen") def test_available(self, mock_urlopen): mock_resp = MagicMock() mock_resp.status = 200 mock_resp.__enter__ = MagicMock(return_value=mock_resp) mock_resp.__exit__ = MagicMock(return_value=False) mock_urlopen.return_value = mock_resp assert is_available() is True @patch("cortex.intelligence.llm_extractor.urllib.request.urlopen") def test_unavailable(self, mock_urlopen): mock_urlopen.side_effect = Exception("connection refused") assert is_available() is False