Fix str.format KeyError by using %-formatting for ontology JSON

This commit is contained in:
_Yusaki 2026-03-13 19:22:36 +07:00
parent b4c7f67b00
commit e14080129e

View file

@ -16,37 +16,22 @@ from .text_processor import TextProcessor
logger = logging.getLogger('mirofish.llm_graph_builder')
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
ONTOLOGY SCHEMA:
{ontology_json}
RULES:
1. Extract entities that match the entity_types defined in the schema. Each entity needs: name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.
2. Extract relationships between entities that match the edge_types defined in the schema. Each relationship needs: name (the edge type name), source (entity name), target (entity name), and a fact (short description of the relationship).
3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.
4. Use consistent entity names across extractions (e.g., always "Mira" not sometimes "Mira" and sometimes "Mira the Socializer").
5. If no entities or relationships are found, return empty arrays.
Return JSON in this exact format:
{
"entities": [
{
"name": "EntityName",
"type": "EntityTypeName",
"summary": "Brief description",
"attributes": {"attr_name": "attr_value"}
}
],
"relationships": [
{
"name": "EDGE_TYPE_NAME",
"source": "SourceEntityName",
"target": "TargetEntityName",
"fact": "Description of this relationship"
}
]
}"""
EXTRACT_SYSTEM_PROMPT_TEMPLATE = (
"You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, "
"extract all entities and relationships.\n\n"
"ONTOLOGY SCHEMA:\n%s\n\n"
"RULES:\n"
"1. Extract entities that match the entity_types defined in the schema. Each entity needs: "
"name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.\n"
"2. Extract relationships between entities that match the edge_types defined in the schema. "
"Each relationship needs: name (the edge type name), source (entity name), target (entity name), "
"and a fact (short description of the relationship).\n"
"3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.\n"
"4. Use consistent entity names across extractions.\n"
"5. If no entities or relationships are found, return empty arrays.\n\n"
'Return JSON with keys "entities" (array of objects with name, type, summary, attributes) '
'and "relationships" (array of objects with name, source, target, fact).'
)
class LLMGraphBuilderService:
@ -104,7 +89,7 @@ class LLMGraphBuilderService:
messages=[
{
"role": "system",
"content": EXTRACT_SYSTEM_PROMPT.format(ontology_json=ontology_json)
"content": EXTRACT_SYSTEM_PROMPT_TEMPLATE % ontology_json
},
{
"role": "user",