From b17828807f379ea7d7a0667deba35298c8e29a01 Mon Sep 17 00:00:00 2001 From: _Yusaki Date: Fri, 13 Mar 2026 19:48:25 +0700 Subject: [PATCH] Use disk-stored graph data for simulation prepare instead of Zep --- backend/app/api/simulation.py | 32 ++++-- backend/app/services/simulation_manager.py | 112 ++++++++++++++++++--- 2 files changed, 124 insertions(+), 20 deletions(-) diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 568e7f0..83221b3 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -12,6 +12,7 @@ from ..config import Config from ..services.zep_entity_reader import ZepEntityReader from ..services.oasis_profile_generator import OasisProfileGenerator from ..services.simulation_manager import SimulationManager, SimulationStatus +from ..services.llm_graph_builder import LLMGraphBuilderService from ..services.simulation_runner import SimulationRunner, RunnerStatus from ..utils.logger import get_logger from ..models.project import ProjectManager @@ -471,14 +472,29 @@ def prepare_simulation(): # 这样前端在调用prepare后立即就能获取到预期Agent总数 try: logger.info(f"同步获取实体数量: graph_id={state.graph_id}") - reader = ZepEntityReader() - # 快速读取实体(不需要边信息,只统计数量) - filtered_preview = reader.filter_defined_entities( - graph_id=state.graph_id, - defined_entity_types=entity_types_list, - enrich_with_edges=False # 不获取边信息,加快速度 - ) - # 保存实体数量到状态(供前端立即获取) + + # Try disk-stored graph data first (LLM-built graphs) + disk_graph_data = None + all_projects = ProjectManager.list_projects() + for proj in all_projects: + if proj.graph_id == state.graph_id: + project_dir = ProjectManager._get_project_dir(proj.project_id) + disk_graph_data = LLMGraphBuilderService.load_graph_data(project_dir) + break + + if disk_graph_data: + manager = SimulationManager() + filtered_preview = manager._filter_entities_from_data( + disk_graph_data, entity_types_list + ) + else: + reader = ZepEntityReader() + filtered_preview = reader.filter_defined_entities( + graph_id=state.graph_id, + defined_entity_types=entity_types_list, + enrich_with_edges=False + ) + state.entities_count = filtered_preview.filtered_count state.entity_types = list(filtered_preview.entity_types) logger.info(f"预期实体数量: {filtered_preview.filtered_count}, 类型: {filtered_preview.entity_types}") diff --git a/backend/app/services/simulation_manager.py b/backend/app/services/simulation_manager.py index 4aa2169..eaf3206 100644 --- a/backend/app/services/simulation_manager.py +++ b/backend/app/services/simulation_manager.py @@ -14,7 +14,8 @@ from enum import Enum from ..config import Config from ..utils.logger import get_logger -from .zep_entity_reader import ZepEntityReader, FilteredEntities +from .zep_entity_reader import ZepEntityReader, FilteredEntities, EntityNode +from .llm_graph_builder import LLMGraphBuilderService from .oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile from .simulation_config_generator import SimulationConfigGenerator, SimulationParameters @@ -153,6 +154,77 @@ class SimulationManager: self._simulations[state.simulation_id] = state + def _filter_entities_from_data( + self, + graph_data: Dict[str, Any], + defined_entity_types: Optional[List[str]] = None + ) -> FilteredEntities: + """Filter entities from disk-stored graph data (no Zep needed).""" + nodes = graph_data.get("nodes", []) + edges = graph_data.get("edges", []) + total_count = len(nodes) + + # Build node UUID map for edge enrichment + node_map = {n["uuid"]: n for n in nodes} + + filtered_entities = [] + entity_types_found = set() + + for node in nodes: + labels = node.get("labels", []) + meaningful_labels = [l for l in labels if l not in ("Entity", "Node")] + if not meaningful_labels: + continue + + entity_type = meaningful_labels[0] + if defined_entity_types and entity_type not in defined_entity_types: + continue + + entity_types_found.add(entity_type) + + # Find related edges + related_edges = [] + related_nodes = [] + node_uuid = node.get("uuid", "") + for edge in edges: + if edge.get("source_node_uuid") == node_uuid or edge.get("target_node_uuid") == node_uuid: + related_edges.append({ + "uuid": edge.get("uuid", ""), + "name": edge.get("name", ""), + "fact": edge.get("fact", ""), + "source_node_uuid": edge.get("source_node_uuid", ""), + "target_node_uuid": edge.get("target_node_uuid", ""), + "source_node_name": edge.get("source_node_name", ""), + "target_node_name": edge.get("target_node_name", ""), + }) + # Add related node + other_uuid = (edge.get("target_node_uuid") if edge.get("source_node_uuid") == node_uuid + else edge.get("source_node_uuid")) + other_node = node_map.get(other_uuid) + if other_node: + related_nodes.append({ + "uuid": other_node.get("uuid", ""), + "name": other_node.get("name", ""), + "labels": other_node.get("labels", []), + }) + + filtered_entities.append(EntityNode( + uuid=node_uuid, + name=node.get("name", ""), + labels=labels, + summary=node.get("summary", ""), + attributes=node.get("attributes", {}), + related_edges=related_edges, + related_nodes=related_nodes, + )) + + return FilteredEntities( + entities=filtered_entities, + entity_types=entity_types_found, + total_count=total_count, + filtered_count=len(filtered_entities), + ) + def _load_simulation_state(self, simulation_id: str) -> Optional[SimulationState]: """从文件加载模拟状态""" if simulation_id in self._simulations: @@ -270,18 +342,34 @@ class SimulationManager: # ========== 阶段1: 读取并过滤实体 ========== if progress_callback: - progress_callback("reading", 0, "Connecting to Zep graph...") - - reader = ZepEntityReader() - + progress_callback("reading", 0, "Reading graph data...") + + # Try loading graph data from disk first (LLM-built graphs) + from ..models.project import ProjectManager + disk_graph_data = None + all_projects = ProjectManager.list_projects() + for proj in all_projects: + if proj.graph_id == state.graph_id: + project_dir = ProjectManager._get_project_dir(proj.project_id) + disk_graph_data = LLMGraphBuilderService.load_graph_data(project_dir) + break + if progress_callback: - progress_callback("reading", 30, "Reading node data...") - - filtered = reader.filter_defined_entities( - graph_id=state.graph_id, - defined_entity_types=defined_entity_types, - enrich_with_edges=True - ) + progress_callback("reading", 30, "Filtering entities...") + + if disk_graph_data: + # Build FilteredEntities from disk data + filtered = self._filter_entities_from_data( + disk_graph_data, defined_entity_types + ) + else: + # Fall back to Zep + reader = ZepEntityReader() + filtered = reader.filter_defined_entities( + graph_id=state.graph_id, + defined_entity_types=defined_entity_types, + enrich_with_edges=True + ) state.entities_count = filtered.filtered_count state.entity_types = list(filtered.entity_types)