Add logging to LLM graph builder for debugging extraction failures

2026-03-13 19:14:36 +07:00 · 2026-03-13 19:14:36 +07:00 · db6e235b98
commit db6e235b98
parent 79519ddd54
1 changed files with 22 additions and 2 deletions
--- a/backend/app/services/llm_graph_builder.py
+++ b/backend/app/services/llm_graph_builder.py
@ -6,12 +6,15 @@ Replaces Zep with direct LLM calls for entity/relationship extraction
 import os
 import uuid
 import json
 import logging
 from typing import Dict, Any, List, Optional, Callable
 from ..utils.llm_client import LLMClient
 from ..models.task import TaskManager, TaskStatus
 from .text_processor import TextProcessor
 logger = logging.getLogger('mirofish.llm_graph_builder')
 EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
@ -84,14 +87,19 @@ class LLMGraphBuilderService:
        ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False)
        total = len(chunks)
        success_count = 0
        fail_count = 0
        last_error = None
        for i, chunk in enumerate(chunks):
            if progress_callback:
                progress_callback(
-                    f"Extracting from chunk {i+1}/{total}...",
+                    f"Extracting from chunk {i+1}/{total} (ok={success_count}, fail={fail_count})...",
                    (i + 1) / total
                )
            try:
                logger.info(f"Extracting chunk {i+1}/{total} ({len(chunk)} chars)")
                result = self.llm.chat_json(
                    messages=[
                        {
@ -106,10 +114,22 @@ class LLMGraphBuilderService:
                    temperature=0.1,
                    max_tokens=4096
                )
                entities = result.get("entities", [])
                rels = result.get("relationships", [])
                logger.info(f"Chunk {i+1}: extracted {len(entities)} entities, {len(rels)} relationships")
                self._merge_extraction(graph_id, result)
                success_count += 1
            except Exception as e:
                fail_count += 1
                last_error = e
                logger.error(f"Chunk {i+1} extraction failed: {e}")
                if progress_callback:
-                    progress_callback(f"Chunk {i+1} extraction error: {e}", (i + 1) / total)
+                    progress_callback(f"Chunk {i+1} error: {e}", (i + 1) / total)
        logger.info(f"Extraction complete: {success_count}/{total} succeeded, {fail_count} failed")
        if success_count == 0 and total > 0:
            raise RuntimeError(f"All {total} chunks failed extraction. Last error: {last_error}")
    def _merge_extraction(self, graph_id: str, result: Dict[str, Any]):
        """Merge extracted entities/relationships into the graph, deduplicating by name."""