diff --git a/backend/app/services/llm_graph_builder.py b/backend/app/services/llm_graph_builder.py index e87ed6c..bbb2c69 100644 --- a/backend/app/services/llm_graph_builder.py +++ b/backend/app/services/llm_graph_builder.py @@ -6,12 +6,15 @@ Replaces Zep with direct LLM calls for entity/relationship extraction import os import uuid import json +import logging from typing import Dict, Any, List, Optional, Callable from ..utils.llm_client import LLMClient from ..models.task import TaskManager, TaskStatus from .text_processor import TextProcessor +logger = logging.getLogger('mirofish.llm_graph_builder') + EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships. @@ -84,14 +87,19 @@ class LLMGraphBuilderService: ontology_json = json.dumps(ontology, indent=2, ensure_ascii=False) total = len(chunks) + success_count = 0 + fail_count = 0 + last_error = None + for i, chunk in enumerate(chunks): if progress_callback: progress_callback( - f"Extracting from chunk {i+1}/{total}...", + f"Extracting from chunk {i+1}/{total} (ok={success_count}, fail={fail_count})...", (i + 1) / total ) try: + logger.info(f"Extracting chunk {i+1}/{total} ({len(chunk)} chars)") result = self.llm.chat_json( messages=[ { @@ -106,10 +114,22 @@ class LLMGraphBuilderService: temperature=0.1, max_tokens=4096 ) + entities = result.get("entities", []) + rels = result.get("relationships", []) + logger.info(f"Chunk {i+1}: extracted {len(entities)} entities, {len(rels)} relationships") self._merge_extraction(graph_id, result) + success_count += 1 except Exception as e: + fail_count += 1 + last_error = e + logger.error(f"Chunk {i+1} extraction failed: {e}") if progress_callback: - progress_callback(f"Chunk {i+1} extraction error: {e}", (i + 1) / total) + progress_callback(f"Chunk {i+1} error: {e}", (i + 1) / total) + + logger.info(f"Extraction complete: {success_count}/{total} succeeded, {fail_count} failed") + + if success_count == 0 and total > 0: + raise RuntimeError(f"All {total} chunks failed extraction. Last error: {last_error}") def _merge_extraction(self, graph_id: str, result: Dict[str, Any]): """Merge extracted entities/relationships into the graph, deduplicating by name."""