Fix str.format KeyError by using %-formatting for ontology JSON
This commit is contained in:
parent
b4c7f67b00
commit
e14080129e
1 changed files with 17 additions and 32 deletions
|
|
@ -16,37 +16,22 @@ from .text_processor import TextProcessor
|
|||
logger = logging.getLogger('mirofish.llm_graph_builder')
|
||||
|
||||
|
||||
EXTRACT_SYSTEM_PROMPT = """You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, extract all entities and relationships.
|
||||
|
||||
ONTOLOGY SCHEMA:
|
||||
{ontology_json}
|
||||
|
||||
RULES:
|
||||
1. Extract entities that match the entity_types defined in the schema. Each entity needs: name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.
|
||||
2. Extract relationships between entities that match the edge_types defined in the schema. Each relationship needs: name (the edge type name), source (entity name), target (entity name), and a fact (short description of the relationship).
|
||||
3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.
|
||||
4. Use consistent entity names across extractions (e.g., always "Mira" not sometimes "Mira" and sometimes "Mira the Socializer").
|
||||
5. If no entities or relationships are found, return empty arrays.
|
||||
|
||||
Return JSON in this exact format:
|
||||
{
|
||||
"entities": [
|
||||
{
|
||||
"name": "EntityName",
|
||||
"type": "EntityTypeName",
|
||||
"summary": "Brief description",
|
||||
"attributes": {"attr_name": "attr_value"}
|
||||
}
|
||||
],
|
||||
"relationships": [
|
||||
{
|
||||
"name": "EDGE_TYPE_NAME",
|
||||
"source": "SourceEntityName",
|
||||
"target": "TargetEntityName",
|
||||
"fact": "Description of this relationship"
|
||||
}
|
||||
]
|
||||
}"""
|
||||
EXTRACT_SYSTEM_PROMPT_TEMPLATE = (
|
||||
"You are a knowledge graph extraction engine. Given a text chunk and an ontology schema, "
|
||||
"extract all entities and relationships.\n\n"
|
||||
"ONTOLOGY SCHEMA:\n%s\n\n"
|
||||
"RULES:\n"
|
||||
"1. Extract entities that match the entity_types defined in the schema. Each entity needs: "
|
||||
"name, type (matching an entity_type name), summary (1-2 sentences), and any attributes defined for that type.\n"
|
||||
"2. Extract relationships between entities that match the edge_types defined in the schema. "
|
||||
"Each relationship needs: name (the edge type name), source (entity name), target (entity name), "
|
||||
"and a fact (short description of the relationship).\n"
|
||||
"3. Only extract entities and relationships that are explicitly mentioned or strongly implied in the text.\n"
|
||||
"4. Use consistent entity names across extractions.\n"
|
||||
"5. If no entities or relationships are found, return empty arrays.\n\n"
|
||||
'Return JSON with keys "entities" (array of objects with name, type, summary, attributes) '
|
||||
'and "relationships" (array of objects with name, source, target, fact).'
|
||||
)
|
||||
|
||||
|
||||
class LLMGraphBuilderService:
|
||||
|
|
@ -104,7 +89,7 @@ class LLMGraphBuilderService:
|
|||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": EXTRACT_SYSTEM_PROMPT.format(ontology_json=ontology_json)
|
||||
"content": EXTRACT_SYSTEM_PROMPT_TEMPLATE % ontology_json
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
|
|
|
|||
Loading…
Reference in a new issue