Add logging to LLM graph builder for debugging extraction failures

This commit is contained in:
_Yusaki 2026-03-13 19:14:36 +07:00
parent 79519ddd54
commit db6e235b98

View file

@ -6,12 +6,15 @@ Replaces Zep with direct LLM calls for entity/relationship extraction
import os
import uuid
import json
import logging
from typing import Dict, Any, List, Optional, Callable
from ..utils.llm_client import LLMClient
from ..models.task import TaskManager, TaskStatus
from .text_processor import TextProcessor
logger = logging.getLogger('mirofish.llm_graph_builder')
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
@ -84,14 +87,19 @@ class LLMGraphBuilderService:
ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False)
total = len(chunks)
success_count = 0
fail_count = 0
last_error = None
for i, chunk in enumerate(chunks):
if progress_callback:
progress_callback(
f"Extracting from chunk {i+1}/{total}...",
f"Extracting from chunk {i+1}/{total} (ok={success_count}, fail={fail_count})...",
(i + 1) / total
)
try:
logger.info(f"Extracting chunk {i+1}/{total} ({len(chunk)} chars)")
result = self.llm.chat_json(
messages=[
{
@ -106,10 +114,22 @@ class LLMGraphBuilderService:
temperature=0.1,
max_tokens=4096
)
entities = result.get("entities", [])
rels = result.get("relationships", [])
logger.info(f"Chunk {i+1}: extracted {len(entities)} entities, {len(rels)} relationships")
self._merge_extraction(graph_id, result)
success_count += 1
except Exception as e:
fail_count += 1
last_error = e
logger.error(f"Chunk {i+1} extraction failed: {e}")
if progress_callback:
progress_callback(f"Chunk {i+1} extraction error: {e}", (i + 1) / total)
progress_callback(f"Chunk {i+1} error: {e}", (i + 1) / total)
logger.info(f"Extraction complete: {success_count}/{total} succeeded, {fail_count} failed")
if success_count == 0 and total > 0:
raise RuntimeError(f"All {total} chunks failed extraction. Last error: {last_error}")
def _merge_extraction(self, graph_id: str, result: Dict[str, Any]):
"""Merge extracted entities/relationships into the graph, deduplicating by name."""