Fix str.format KeyError by using %-formatting for ontology JSON

2026-03-13 19:22:36 +07:00 · 2026-03-13 19:22:36 +07:00 · e14080129e
commit e14080129e
parent b4c7f67b00
1 changed files with 17 additions and 32 deletions
--- a/backend/app/services/llm_graph_builder.py
+++ b/backend/app/services/llm_graph_builder.py
@ -16,37 +16,22 @@ from .text_processor import TextProcessor
 logger = logging.getLogger('mirofish.llm_graph_builder')
-EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
+EXTRACT_SYSTEM_PROMPT_TEMPLATE = (
-
+    "You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, "
-ONTOLOGY SCHEMA:
+    "extract all entities and relationships.\n\n"
-{ontology_json}
+    "ONTOLOGY SCHEMA:\n%s\n\n"
-
+    "RULES:\n"
-RULES:
+    "1. Extract entities that match the entity_types defined in the schema. Each entity needs: "
-1. Extract entities that match the entity_types defined in the schema. Each entity needs: name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.
+    "name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.\n"
-2. Extract relationships between entities that match the edge_types defined in the schema. Each relationship needs: name (the edge type name), source (entity name), target (entity name), and a fact (short description of the relationship).
+    "2. Extract relationships between entities that match the edge_types defined in the schema. "
-3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.
+    "Each relationship needs: name (the edge type name), source (entity name), target (entity name), "
-4. Use consistent entity names across extractions (e.g., always "Mira" not sometimes "Mira" and sometimes "Mira the Socializer").
+    "and a fact (short description of the relationship).\n"
-5. If no entities or relationships are found, return empty arrays.
+    "3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.\n"
-
+    "4. Use consistent entity names across extractions.\n"
-Return JSON in this exact format:
+    "5. If no entities or relationships are found, return empty arrays.\n\n"
-{
+    'Return JSON with keys "entities" (array of objects with name, type, summary, attributes) '
-  "entities": [
+    'and "relationships" (array of objects with name, source, target, fact).'
-    {
+)
      "name": "EntityName",
      "type": "EntityTypeName",
      "summary": "Brief description",
      "attributes": {"attr_name": "attr_value"}
    }
  ],
  "relationships": [
    {
      "name": "EDGE_TYPE_NAME",
      "source": "SourceEntityName",
      "target": "TargetEntityName",
      "fact": "Description of this relationship"
    }
  ]
 }"""
 class LLMGraphBuilderService:
@ -104,7 +89,7 @@ class LLMGraphBuilderService:
                    messages=[
                        {
                            "role": "system",
-                            "content": EXTRACT_SYSTEM_PROMPT.format(ontology_json=ontology_json)
+                            "content": EXTRACT_SYSTEM_PROMPT_TEMPLATE % ontology_json
                        },
                        {
                            "role": "user",