Add logging to LLM graph builder for debugging extraction failures
This commit is contained in:
parent
79519ddd54
commit
db6e235b98
1 changed files with 22 additions and 2 deletions
|
|
@ -6,12 +6,15 @@ Replaces Zep with direct LLM calls for entity/relationship extraction
|
|||
import os
|
||||
import uuid
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Callable
|
||||
|
||||
from ..utils.llm_client import LLMClient
|
||||
from ..models.task import TaskManager, TaskStatus
|
||||
from .text_processor import TextProcessor
|
||||
|
||||
logger = logging.getLogger('mirofish.llm_graph_builder')
|
||||
|
||||
|
||||
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
|
||||
|
||||
|
|
@ -84,14 +87,19 @@ class LLMGraphBuilderService:
|
|||
ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False)
|
||||
|
||||
total = len(chunks)
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
last_error = None
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
if progress_callback:
|
||||
progress_callback(
|
||||
f"Extracting from chunk {i+1}/{total}...",
|
||||
f"Extracting from chunk {i+1}/{total} (ok={success_count}, fail={fail_count})...",
|
||||
(i + 1) / total
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info(f"Extracting chunk {i+1}/{total} ({len(chunk)} chars)")
|
||||
result = self.llm.chat_json(
|
||||
messages=[
|
||||
{
|
||||
|
|
@ -106,10 +114,22 @@ class LLMGraphBuilderService:
|
|||
temperature=0.1,
|
||||
max_tokens=4096
|
||||
)
|
||||
entities = result.get("entities", [])
|
||||
rels = result.get("relationships", [])
|
||||
logger.info(f"Chunk {i+1}: extracted {len(entities)} entities, {len(rels)} relationships")
|
||||
self._merge_extraction(graph_id, result)
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
fail_count += 1
|
||||
last_error = e
|
||||
logger.error(f"Chunk {i+1} extraction failed: {e}")
|
||||
if progress_callback:
|
||||
progress_callback(f"Chunk {i+1} extraction error: {e}", (i + 1) / total)
|
||||
progress_callback(f"Chunk {i+1} error: {e}", (i + 1) / total)
|
||||
|
||||
logger.info(f"Extraction complete: {success_count}/{total} succeeded, {fail_count} failed")
|
||||
|
||||
if success_count == 0 and total > 0:
|
||||
raise RuntimeError(f"All {total} chunks failed extraction. Last error: {last_error}")
|
||||
|
||||
def _merge_extraction(self, graph_id: str, result: Dict[str, Any]):
|
||||
"""Merge extracted entities/relationships into the graph, deduplicating by name."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue