Use disk-stored graph data for simulation prepare instead of Zep

This commit is contained in:
_Yusaki 2026-03-13 19:48:25 +07:00
parent 49a86c622a
commit b17828807f
2 changed files with 124 additions and 20 deletions

View file

@ -12,6 +12,7 @@ from ..config import Config
from ..services.zep_entity_reader import ZepEntityReader
from ..services.oasis_profile_generator import OasisProfileGenerator
from ..services.simulation_manager import SimulationManager, SimulationStatus
from ..services.llm_graph_builder import LLMGraphBuilderService
from ..services.simulation_runner import SimulationRunner, RunnerStatus
from ..utils.logger import get_logger
from ..models.project import ProjectManager
@ -471,14 +472,29 @@ def prepare_simulation():
# 这样前端在调用prepare后立即就能获取到预期Agent总数
try:
logger.info(f"同步获取实体数量: graph_id={state.graph_id}")
reader = ZepEntityReader()
# 快速读取实体(不需要边信息,只统计数量)
filtered_preview = reader.filter_defined_entities(
graph_id=state.graph_id,
defined_entity_types=entity_types_list,
enrich_with_edges=False # 不获取边信息,加快速度
)
# 保存实体数量到状态(供前端立即获取)
# Try disk-stored graph data first (LLM-built graphs)
disk_graph_data = None
all_projects = ProjectManager.list_projects()
for proj in all_projects:
if proj.graph_id == state.graph_id:
project_dir = ProjectManager._get_project_dir(proj.project_id)
disk_graph_data = LLMGraphBuilderService.load_graph_data(project_dir)
break
if disk_graph_data:
manager = SimulationManager()
filtered_preview = manager._filter_entities_from_data(
disk_graph_data, entity_types_list
)
else:
reader = ZepEntityReader()
filtered_preview = reader.filter_defined_entities(
graph_id=state.graph_id,
defined_entity_types=entity_types_list,
enrich_with_edges=False
)
state.entities_count = filtered_preview.filtered_count
state.entity_types = list(filtered_preview.entity_types)
logger.info(f"预期实体数量: {filtered_preview.filtered_count}, 类型: {filtered_preview.entity_types}")

View file

@ -14,7 +14,8 @@ from enum import Enum
from ..config import Config
from ..utils.logger import get_logger
from .zep_entity_reader import ZepEntityReader, FilteredEntities
from .zep_entity_reader import ZepEntityReader, FilteredEntities, EntityNode
from .llm_graph_builder import LLMGraphBuilderService
from .oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile
from .simulation_config_generator import SimulationConfigGenerator, SimulationParameters
@ -153,6 +154,77 @@ class SimulationManager:
self._simulations[state.simulation_id] = state
def _filter_entities_from_data(
self,
graph_data: Dict[str, Any],
defined_entity_types: Optional[List[str]] = None
) -> FilteredEntities:
"""Filter entities from disk-stored graph data (no Zep needed)."""
nodes = graph_data.get("nodes", [])
edges = graph_data.get("edges", [])
total_count = len(nodes)
# Build node UUID map for edge enrichment
node_map = {n["uuid"]: n for n in nodes}
filtered_entities = []
entity_types_found = set()
for node in nodes:
labels = node.get("labels", [])
meaningful_labels = [l for l in labels if l not in ("Entity", "Node")]
if not meaningful_labels:
continue
entity_type = meaningful_labels[0]
if defined_entity_types and entity_type not in defined_entity_types:
continue
entity_types_found.add(entity_type)
# Find related edges
related_edges = []
related_nodes = []
node_uuid = node.get("uuid", "")
for edge in edges:
if edge.get("source_node_uuid") == node_uuid or edge.get("target_node_uuid") == node_uuid:
related_edges.append({
"uuid": edge.get("uuid", ""),
"name": edge.get("name", ""),
"fact": edge.get("fact", ""),
"source_node_uuid": edge.get("source_node_uuid", ""),
"target_node_uuid": edge.get("target_node_uuid", ""),
"source_node_name": edge.get("source_node_name", ""),
"target_node_name": edge.get("target_node_name", ""),
})
# Add related node
other_uuid = (edge.get("target_node_uuid") if edge.get("source_node_uuid") == node_uuid
else edge.get("source_node_uuid"))
other_node = node_map.get(other_uuid)
if other_node:
related_nodes.append({
"uuid": other_node.get("uuid", ""),
"name": other_node.get("name", ""),
"labels": other_node.get("labels", []),
})
filtered_entities.append(EntityNode(
uuid=node_uuid,
name=node.get("name", ""),
labels=labels,
summary=node.get("summary", ""),
attributes=node.get("attributes", {}),
related_edges=related_edges,
related_nodes=related_nodes,
))
return FilteredEntities(
entities=filtered_entities,
entity_types=entity_types_found,
total_count=total_count,
filtered_count=len(filtered_entities),
)
def _load_simulation_state(self, simulation_id: str) -> Optional[SimulationState]:
"""从文件加载模拟状态"""
if simulation_id in self._simulations:
@ -270,18 +342,34 @@ class SimulationManager:
# ========== 阶段1: 读取并过滤实体 ==========
if progress_callback:
progress_callback("reading", 0, "Connecting to Zep graph...")
reader = ZepEntityReader()
progress_callback("reading", 0, "Reading graph data...")
# Try loading graph data from disk first (LLM-built graphs)
from ..models.project import ProjectManager
disk_graph_data = None
all_projects = ProjectManager.list_projects()
for proj in all_projects:
if proj.graph_id == state.graph_id:
project_dir = ProjectManager._get_project_dir(proj.project_id)
disk_graph_data = LLMGraphBuilderService.load_graph_data(project_dir)
break
if progress_callback:
progress_callback("reading", 30, "Reading node data...")
filtered = reader.filter_defined_entities(
graph_id=state.graph_id,
defined_entity_types=defined_entity_types,
enrich_with_edges=True
)
progress_callback("reading", 30, "Filtering entities...")
if disk_graph_data:
# Build FilteredEntities from disk data
filtered = self._filter_entities_from_data(
disk_graph_data, defined_entity_types
)
else:
# Fall back to Zep
reader = ZepEntityReader()
filtered = reader.filter_defined_entities(
graph_id=state.graph_id,
defined_entity_types=defined_entity_types,
enrich_with_edges=True
)
state.entities_count = filtered.filtered_count
state.entity_types = list(filtered.entity_types)