Fix str.format KeyError by using %-formatting for ontology JSON

2026-03-13 19:22:36 +07:00 · 2026-03-13 19:22:36 +07:00 · e14080129e
commit e14080129e
parent b4c7f67b00
1 changed files with 17 additions and 32 deletions
--- a/backend/app/services/llm_graph_builder.py
+++ b/backend/app/services/llm_graph_builder.py
@ -16,37 +16,22 @@ from .text_processor import TextProcessor
 logger = logging.getLogger('mirofish.llm_graph_builder')


-EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
-
-ONTOLOGY SCHEMA:
-{ontology_json}
-
-RULES:
-1. Extract entities that match the entity_types defined in the schema. Each entity needs: name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.
-2. Extract relationships between entities that match the edge_types defined in the schema. Each relationship needs: name (the edge type name), source (entity name), target (entity name), and a fact (short description of the relationship).
-3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.
-4. Use consistent entity names across extractions (e.g., always "Mira" not sometimes "Mira" and sometimes "Mira the Socializer").
-5. If no entities or relationships are found, return empty arrays.
-
-Return JSON in this exact format:
-{
-  "entities": [
-    {
-      "name": "EntityName",
-      "type": "EntityTypeName",
-      "summary": "Brief description",
-      "attributes": {"attr_name": "attr_value"}
-    }
-  ],
-  "relationships": [
-    {
-      "name": "EDGE_TYPE_NAME",
-      "source": "SourceEntityName",
-      "target": "TargetEntityName",
-      "fact": "Description of this relationship"
-    }
-  ]
-}"""
+EXTRACT_SYSTEM_PROMPT_TEMPLATE = (
+    "You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, "
+    "extract all entities and relationships.\n\n"
+    "ONTOLOGY SCHEMA:\n%s\n\n"
+    "RULES:\n"
+    "1. Extract entities that match the entity_types defined in the schema. Each entity needs: "
+    "name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.\n"
+    "2. Extract relationships between entities that match the edge_types defined in the schema. "
+    "Each relationship needs: name (the edge type name), source (entity name), target (entity name), "
+    "and a fact (short description of the relationship).\n"
+    "3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.\n"
+    "4. Use consistent entity names across extractions.\n"
+    "5. If no entities or relationships are found, return empty arrays.\n\n"
+    'Return JSON with keys "entities" (array of objects with name, type, summary, attributes) '
+    'and "relationships" (array of objects with name, source, target, fact).'
+)


 class LLMGraphBuilderService:
@ -104,7 +89,7 @@ class LLMGraphBuilderService:
                    messages=[
                        {
                            "role": "system",
-                            "content": EXTRACT_SYSTEM_PROMPT.format(ontology_json=ontology_json)
+                            "content": EXTRACT_SYSTEM_PROMPT_TEMPLATE % ontology_json
                        },
                        {
                            "role": "user",