Add logging to LLM graph builder for debugging extraction failures

This commit is contained in:
_Yusaki 2026-03-13 19:14:36 +07:00
parent 79519ddd54
commit db6e235b98

View file

@ -6,12 +6,15 @@ Replaces Zep with direct LLM calls for entity/relationship extraction
import os import os
import uuid import uuid
import json import json
import logging
from typing import Dict, Any, List, Optional, Callable from typing import Dict, Any, List, Optional, Callable
from ..utils.llm_client import LLMClient from ..utils.llm_client import LLMClient
from ..models.task import TaskManager, TaskStatus from ..models.task import TaskManager, TaskStatus
from .text_processor import TextProcessor from .text_processor import TextProcessor
logger = logging.getLogger('mirofish.llm_graph_builder')
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships. EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
@ -84,14 +87,19 @@ class LLMGraphBuilderService:
ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False) ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False)
total = len(chunks) total = len(chunks)
success_count = 0
fail_count = 0
last_error = None
for i, chunk in enumerate(chunks): for i, chunk in enumerate(chunks):
if progress_callback: if progress_callback:
progress_callback( progress_callback(
f"Extracting from chunk {i+1}/{total}...", f"Extracting from chunk {i+1}/{total} (ok={success_count}, fail={fail_count})...",
(i + 1) / total (i + 1) / total
) )
try: try:
logger.info(f"Extracting chunk {i+1}/{total} ({len(chunk)} chars)")
result = self.llm.chat_json( result = self.llm.chat_json(
messages=[ messages=[
{ {
@ -106,10 +114,22 @@ class LLMGraphBuilderService:
temperature=0.1, temperature=0.1,
max_tokens=4096 max_tokens=4096
) )
entities = result.get("entities", [])
rels = result.get("relationships", [])
logger.info(f"Chunk {i+1}: extracted {len(entities)} entities, {len(rels)} relationships")
self._merge_extraction(graph_id, result) self._merge_extraction(graph_id, result)
success_count += 1
except Exception as e: except Exception as e:
fail_count += 1
last_error = e
logger.error(f"Chunk {i+1} extraction failed: {e}")
if progress_callback: if progress_callback:
progress_callback(f"Chunk {i+1} extraction error: {e}", (i + 1) / total) progress_callback(f"Chunk {i+1} error: {e}", (i + 1) / total)
logger.info(f"Extraction complete: {success_count}/{total} succeeded, {fail_count} failed")
if success_count == 0 and total > 0:
raise RuntimeError(f"All {total} chunks failed extraction. Last error: {last_error}")
def _merge_extraction(self, graph_id: str, result: Dict[str, Any]): def _merge_extraction(self, graph_id: str, result: Dict[str, Any]):
"""Merge extracted entities/relationships into the graph, deduplicating by name.""" """Merge extracted entities/relationships into the graph, deduplicating by name."""