Add logging to LLM graph builder for debugging extraction failures
This commit is contained in:
parent
79519ddd54
commit
db6e235b98
1 changed files with 22 additions and 2 deletions
|
|
@ -6,12 +6,15 @@ Replaces Zep with direct LLM calls for entity/relationship extraction
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from typing import Dict, Any, List, Optional, Callable
|
from typing import Dict, Any, List, Optional, Callable
|
||||||
|
|
||||||
from ..utils.llm_client import LLMClient
|
from ..utils.llm_client import LLMClient
|
||||||
from ..models.task import TaskManager, TaskStatus
|
from ..models.task import TaskManager, TaskStatus
|
||||||
from .text_processor import TextProcessor
|
from .text_processor import TextProcessor
|
||||||
|
|
||||||
|
logger = logging.getLogger('mirofish.llm_graph_builder')
|
||||||
|
|
||||||
|
|
||||||
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
|
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
|
||||||
|
|
||||||
|
|
@ -84,14 +87,19 @@ class LLMGraphBuilderService:
|
||||||
ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False)
|
ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
total = len(chunks)
|
total = len(chunks)
|
||||||
|
success_count = 0
|
||||||
|
fail_count = 0
|
||||||
|
last_error = None
|
||||||
|
|
||||||
for i, chunk in enumerate(chunks):
|
for i, chunk in enumerate(chunks):
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(
|
progress_callback(
|
||||||
f"Extracting from chunk {i+1}/{total}...",
|
f"Extracting from chunk {i+1}/{total} (ok={success_count}, fail={fail_count})...",
|
||||||
(i + 1) / total
|
(i + 1) / total
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
logger.info(f"Extracting chunk {i+1}/{total} ({len(chunk)} chars)")
|
||||||
result = self.llm.chat_json(
|
result = self.llm.chat_json(
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
|
|
@ -106,10 +114,22 @@ class LLMGraphBuilderService:
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=4096
|
max_tokens=4096
|
||||||
)
|
)
|
||||||
|
entities = result.get("entities", [])
|
||||||
|
rels = result.get("relationships", [])
|
||||||
|
logger.info(f"Chunk {i+1}: extracted {len(entities)} entities, {len(rels)} relationships")
|
||||||
self._merge_extraction(graph_id, result)
|
self._merge_extraction(graph_id, result)
|
||||||
|
success_count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
fail_count += 1
|
||||||
|
last_error = e
|
||||||
|
logger.error(f"Chunk {i+1} extraction failed: {e}")
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(f"Chunk {i+1} extraction error: {e}", (i + 1) / total)
|
progress_callback(f"Chunk {i+1} error: {e}", (i + 1) / total)
|
||||||
|
|
||||||
|
logger.info(f"Extraction complete: {success_count}/{total} succeeded, {fail_count} failed")
|
||||||
|
|
||||||
|
if success_count == 0 and total > 0:
|
||||||
|
raise RuntimeError(f"All {total} chunks failed extraction. Last error: {last_error}")
|
||||||
|
|
||||||
def _merge_extraction(self, graph_id: str, result: Dict[str, Any]):
|
def _merge_extraction(self, graph_id: str, result: Dict[str, Any]):
|
||||||
"""Merge extracted entities/relationships into the graph, deduplicating by name."""
|
"""Merge extracted entities/relationships into the graph, deduplicating by name."""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue