MiroFish/backend/app/services/report_agent.py
666ghj 5ece3f670b Implement Report Agent for automated report generation and interaction
- Introduced the Report Agent module to facilitate the automatic generation of simulation analysis reports using LangChain and Zep, following the ReACT model.
- Added functionality for report outline planning, segmented content generation, and user interaction through a dialogue interface.
- Implemented new API endpoints for report generation, status checking, and retrieval, enhancing the overall reporting capabilities.
- Updated README.md to include detailed instructions on the new report generation features and API usage.
- Enhanced the project structure to accommodate the new report management functionalities, including report storage and retrieval mechanisms.
2025-12-09 15:10:55 +08:00

1296 lines
46 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Report Agent服务
使用LangChain + Zep实现ReACT模式的模拟报告生成
功能:
1. 根据模拟需求和Zep图谱信息生成报告
2. 先规划目录结构,然后分段生成
3. 每段采用ReACT多轮思考与反思模式
4. 支持与用户对话,在对话中自主调用检索工具
"""
import os
import json
import time
import re
from typing import Dict, Any, List, Optional, Callable
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from ..config import Config
from ..utils.llm_client import LLMClient
from ..utils.logger import get_logger
from .zep_tools import ZepToolsService, SearchResult
logger = get_logger('mirofish.report_agent')
class ReportStatus(str, Enum):
"""报告状态"""
PENDING = "pending"
PLANNING = "planning"
GENERATING = "generating"
COMPLETED = "completed"
FAILED = "failed"
@dataclass
class ReportSection:
"""报告章节"""
title: str
content: str = ""
subsections: List['ReportSection'] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return {
"title": self.title,
"content": self.content,
"subsections": [s.to_dict() for s in self.subsections]
}
def to_markdown(self, level: int = 2) -> str:
"""转换为Markdown格式"""
md = f"{'#' * level} {self.title}\n\n"
if self.content:
md += f"{self.content}\n\n"
for sub in self.subsections:
md += sub.to_markdown(level + 1)
return md
@dataclass
class ReportOutline:
"""报告大纲"""
title: str
summary: str
sections: List[ReportSection]
def to_dict(self) -> Dict[str, Any]:
return {
"title": self.title,
"summary": self.summary,
"sections": [s.to_dict() for s in self.sections]
}
def to_markdown(self) -> str:
"""转换为Markdown格式"""
md = f"# {self.title}\n\n"
md += f"> {self.summary}\n\n"
for section in self.sections:
md += section.to_markdown()
return md
@dataclass
class Report:
"""完整报告"""
report_id: str
simulation_id: str
graph_id: str
simulation_requirement: str
status: ReportStatus
outline: Optional[ReportOutline] = None
markdown_content: str = ""
created_at: str = ""
completed_at: str = ""
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"report_id": self.report_id,
"simulation_id": self.simulation_id,
"graph_id": self.graph_id,
"simulation_requirement": self.simulation_requirement,
"status": self.status.value,
"outline": self.outline.to_dict() if self.outline else None,
"markdown_content": self.markdown_content,
"created_at": self.created_at,
"completed_at": self.completed_at,
"error": self.error
}
class ReportAgent:
"""
Report Agent - 模拟报告生成Agent
采用ReACTReasoning + Acting模式
1. 规划阶段:分析模拟需求,规划报告目录结构
2. 生成阶段:逐章节生成内容,每章节可多次调用工具获取信息
3. 反思阶段:检查内容完整性和准确性
工具MCP封装
- search_graph: 图谱语义搜索
- get_graph_statistics: 获取图谱统计
- get_entity_summary: 获取实体摘要
- get_simulation_context: 获取模拟上下文
"""
# 最大工具调用次数(每个章节)
MAX_TOOL_CALLS_PER_SECTION = 5
# 最大反思轮数
MAX_REFLECTION_ROUNDS = 2
def __init__(
self,
graph_id: str,
simulation_id: str,
simulation_requirement: str,
llm_client: Optional[LLMClient] = None,
zep_tools: Optional[ZepToolsService] = None
):
"""
初始化Report Agent
Args:
graph_id: 图谱ID
simulation_id: 模拟ID
simulation_requirement: 模拟需求描述
llm_client: LLM客户端可选
zep_tools: Zep工具服务可选
"""
self.graph_id = graph_id
self.simulation_id = simulation_id
self.simulation_requirement = simulation_requirement
self.llm = llm_client or LLMClient()
self.zep_tools = zep_tools or ZepToolsService()
# 工具定义
self.tools = self._define_tools()
logger.info(f"ReportAgent 初始化完成: graph_id={graph_id}, simulation_id={simulation_id}")
def _define_tools(self) -> Dict[str, Dict[str, Any]]:
"""定义可用工具"""
return {
"search_graph": {
"name": "search_graph",
"description": "在知识图谱中搜索相关信息。输入搜索查询,返回与查询相关的事实和关系。",
"parameters": {
"query": "搜索查询字符串",
"limit": "返回结果数量可选默认10"
}
},
"get_graph_statistics": {
"name": "get_graph_statistics",
"description": "获取知识图谱的统计信息,包括节点数量、边数量、实体类型分布等。",
"parameters": {}
},
"get_entity_summary": {
"name": "get_entity_summary",
"description": "获取指定实体的详细信息和关系摘要。",
"parameters": {
"entity_name": "实体名称"
}
},
"get_simulation_context": {
"name": "get_simulation_context",
"description": "获取与模拟需求相关的上下文信息,包括相关事实、实体列表等。",
"parameters": {
"query": "额外的查询条件(可选)"
}
},
"get_entities_by_type": {
"name": "get_entities_by_type",
"description": "按类型获取实体列表如获取所有Student类型或PublicFigure类型的实体。",
"parameters": {
"entity_type": "实体类型名称"
}
}
}
def _execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> str:
"""
执行工具调用
Args:
tool_name: 工具名称
parameters: 工具参数
Returns:
工具执行结果(文本格式)
"""
logger.info(f"执行工具: {tool_name}, 参数: {parameters}")
try:
if tool_name == "search_graph":
query = parameters.get("query", "")
limit = parameters.get("limit", 10)
result = self.zep_tools.search_graph(
graph_id=self.graph_id,
query=query,
limit=limit
)
return result.to_text()
elif tool_name == "get_graph_statistics":
result = self.zep_tools.get_graph_statistics(self.graph_id)
return json.dumps(result, ensure_ascii=False, indent=2)
elif tool_name == "get_entity_summary":
entity_name = parameters.get("entity_name", "")
result = self.zep_tools.get_entity_summary(
graph_id=self.graph_id,
entity_name=entity_name
)
return json.dumps(result, ensure_ascii=False, indent=2)
elif tool_name == "get_simulation_context":
query = parameters.get("query", self.simulation_requirement)
result = self.zep_tools.get_simulation_context(
graph_id=self.graph_id,
simulation_requirement=query
)
return json.dumps(result, ensure_ascii=False, indent=2)
elif tool_name == "get_entities_by_type":
entity_type = parameters.get("entity_type", "")
nodes = self.zep_tools.get_entities_by_type(
graph_id=self.graph_id,
entity_type=entity_type
)
result = [n.to_dict() for n in nodes]
return json.dumps(result, ensure_ascii=False, indent=2)
else:
return f"未知工具: {tool_name}"
except Exception as e:
logger.error(f"工具执行失败: {tool_name}, 错误: {str(e)}")
return f"工具执行失败: {str(e)}"
def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]:
"""
从LLM响应中解析工具调用
支持的格式:
<tool_call>
{"name": "tool_name", "parameters": {"param1": "value1"}}
</tool_call>
或者:
[TOOL_CALL] tool_name(param1="value1", param2="value2")
"""
tool_calls = []
# 格式1: XML风格
xml_pattern = r'<tool_call>\s*(\{.*?\})\s*</tool_call>'
for match in re.finditer(xml_pattern, response, re.DOTALL):
try:
call_data = json.loads(match.group(1))
tool_calls.append(call_data)
except json.JSONDecodeError:
pass
# 格式2: 函数调用风格
func_pattern = r'\[TOOL_CALL\]\s*(\w+)\s*\((.*?)\)'
for match in re.finditer(func_pattern, response, re.DOTALL):
tool_name = match.group(1)
params_str = match.group(2)
# 解析参数
params = {}
for param_match in re.finditer(r'(\w+)\s*=\s*["\']([^"\']*)["\']', params_str):
params[param_match.group(1)] = param_match.group(2)
tool_calls.append({
"name": tool_name,
"parameters": params
})
return tool_calls
def _get_tools_description(self) -> str:
"""生成工具描述文本"""
desc_parts = ["可用工具:"]
for name, tool in self.tools.items():
params_desc = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()])
desc_parts.append(f"- {name}: {tool['description']}")
if params_desc:
desc_parts.append(f" 参数: {params_desc}")
return "\n".join(desc_parts)
def plan_outline(
self,
progress_callback: Optional[Callable] = None
) -> ReportOutline:
"""
规划报告大纲
使用LLM分析模拟需求规划报告的目录结构
Args:
progress_callback: 进度回调函数
Returns:
ReportOutline: 报告大纲
"""
logger.info("开始规划报告大纲...")
if progress_callback:
progress_callback("planning", 0, "正在分析模拟需求...")
# 首先获取模拟上下文
context = self.zep_tools.get_simulation_context(
graph_id=self.graph_id,
simulation_requirement=self.simulation_requirement
)
if progress_callback:
progress_callback("planning", 30, "正在生成报告大纲...")
# 构建规划prompt
system_prompt = """你是一个专业的舆情分析报告撰写专家。你需要根据用户的模拟需求和已有的知识图谱信息,规划一份精炼的模拟分析报告大纲。
【重要】报告章节数量限制:
- 报告最多包含5个主章节
- 每个章节可以有0-2个子章节
- 内容要精炼,避免冗余
报告应聚焦以下核心内容选择最相关的3-5项
1. 执行摘要 - 简要总结模拟结果和关键发现
2. 模拟背景 - 描述模拟的初始条件和场景设定
3. 关键发现 - 分析模拟中的重要发现和趋势
4. 舆情分析 - 分析舆论走向、情绪变化、关键意见领袖等
5. 建议与展望 - 基于分析结果提出建议
请输出JSON格式的报告大纲格式如下
{
"title": "报告标题",
"summary": "报告摘要(一句话概括)",
"sections": [
{
"title": "章节标题",
"description": "章节内容描述",
"subsections": [
{"title": "子章节标题", "description": "子章节描述"}
]
}
]
}
注意sections数组最多包含5个元素"""
user_prompt = f"""模拟需求:
{self.simulation_requirement}
已有的知识图谱信息:
- 总节点数: {context.get('graph_statistics', {}).get('total_nodes', 0)}
- 总边数: {context.get('graph_statistics', {}).get('total_edges', 0)}
- 实体类型: {list(context.get('graph_statistics', {}).get('entity_types', {}).keys())}
- 实体数量: {context.get('total_entities', 0)}
相关事实:
{json.dumps(context.get('related_facts', [])[:10], ensure_ascii=False, indent=2)}
请根据以上信息,生成一份针对此模拟场景的报告大纲。
【再次提醒】报告必须控制在最多5个章节以内内容要精炼聚焦。"""
try:
response = self.llm.chat_json(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.3
)
if progress_callback:
progress_callback("planning", 80, "正在解析大纲结构...")
# 解析大纲
sections = []
for section_data in response.get("sections", []):
subsections = []
for sub_data in section_data.get("subsections", []):
subsections.append(ReportSection(
title=sub_data.get("title", ""),
content=""
))
sections.append(ReportSection(
title=section_data.get("title", ""),
content="",
subsections=subsections
))
outline = ReportOutline(
title=response.get("title", "模拟分析报告"),
summary=response.get("summary", ""),
sections=sections
)
if progress_callback:
progress_callback("planning", 100, "大纲规划完成")
logger.info(f"大纲规划完成: {len(sections)} 个章节")
return outline
except Exception as e:
logger.error(f"大纲规划失败: {str(e)}")
# 返回默认大纲5个章节
return ReportOutline(
title="模拟分析报告",
summary="基于模拟结果的分析报告",
sections=[
ReportSection(title="执行摘要"),
ReportSection(title="模拟背景与场景设定"),
ReportSection(title="关键发现与趋势分析"),
ReportSection(title="舆情走向与情绪演化"),
ReportSection(title="总结与建议")
]
)
def _generate_section_react(
self,
section: ReportSection,
outline: ReportOutline,
previous_sections: List[str],
progress_callback: Optional[Callable] = None
) -> str:
"""
使用ReACT模式生成单个章节内容
ReACT循环
1. Thought思考- 分析需要什么信息
2. Action行动- 调用工具获取信息
3. Observation观察- 分析工具返回结果
4. 重复直到信息足够或达到最大次数
5. Final Answer最终回答- 生成章节内容
Args:
section: 要生成的章节
outline: 完整大纲
previous_sections: 之前章节的内容(用于保持连贯性)
progress_callback: 进度回调
Returns:
章节内容Markdown格式
"""
logger.info(f"ReACT生成章节: {section.title}")
# 构建系统prompt
system_prompt = f"""你是一个专业的舆情分析报告撰写专家,正在撰写报告的一个章节。
报告标题: {outline.title}
报告摘要: {outline.summary}
模拟需求: {self.simulation_requirement}
当前要撰写的章节: {section.title}
你可以使用以下工具来获取信息,每次最多调用{self.MAX_TOOL_CALLS_PER_SECTION}次:
{self._get_tools_description()}
请按照以下ReACT格式进行思考和行动
Thought: [分析当前需要什么信息来撰写这个章节]
Action: [如果需要信息,调用工具]
<tool_call>
{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}}
</tool_call>
当收集到足够信息后,输出:
Final Answer:
[章节的完整Markdown内容]
注意:
1. 内容要专业、客观、有深度
2. 引用具体的数据和事实
3. 保持与其他章节的逻辑连贯性
4. 使用适当的Markdown格式列表、强调等
5. 不要重复前面章节已经详细描述的内容"""
# 构建用户prompt
previous_content = "\n\n".join(previous_sections) if previous_sections else "(这是第一个章节)"
user_prompt = f"""已完成的章节内容:
{previous_content[:2000]}
现在请撰写章节: {section.title}
首先思考需要什么信息,然后调用工具获取,最后生成内容。"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
# ReACT循环
tool_calls_count = 0
max_iterations = self.MAX_TOOL_CALLS_PER_SECTION + 2
for iteration in range(max_iterations):
if progress_callback:
progress_callback(
"generating",
int((iteration / max_iterations) * 100),
f"思考与行动中 ({tool_calls_count}/{self.MAX_TOOL_CALLS_PER_SECTION})"
)
# 调用LLM
response = self.llm.chat(
messages=messages,
temperature=0.5,
max_tokens=4096
)
logger.debug(f"LLM响应: {response[:200]}...")
# 检查是否有最终答案
if "Final Answer:" in response:
# 提取最终答案
final_answer = response.split("Final Answer:")[-1].strip()
logger.info(f"章节 {section.title} 生成完成")
return final_answer
# 解析工具调用
tool_calls = self._parse_tool_calls(response)
if not tool_calls:
# 没有工具调用也没有最终答案,提示生成最终答案
messages.append({"role": "assistant", "content": response})
messages.append({
"role": "user",
"content": "请基于已有信息,输出 Final Answer: 并生成章节内容。"
})
continue
# 执行工具调用
tool_results = []
for call in tool_calls:
if tool_calls_count >= self.MAX_TOOL_CALLS_PER_SECTION:
break
result = self._execute_tool(call["name"], call.get("parameters", {}))
tool_results.append(f"工具 {call['name']} 返回:\n{result}")
tool_calls_count += 1
# 将结果添加到消息
messages.append({"role": "assistant", "content": response})
messages.append({
"role": "user",
"content": f"Observation:\n" + "\n\n".join(tool_results) + "\n\n请继续思考或输出 Final Answer:"
})
# 达到最大迭代次数,强制生成内容
logger.warning(f"章节 {section.title} 达到最大迭代次数,强制生成")
messages.append({
"role": "user",
"content": "已达到工具调用限制,请直接输出 Final Answer: 并生成章节内容。"
})
response = self.llm.chat(
messages=messages,
temperature=0.5,
max_tokens=4096
)
if "Final Answer:" in response:
return response.split("Final Answer:")[-1].strip()
return response
def generate_report(
self,
progress_callback: Optional[Callable[[str, int, str], None]] = None
) -> Report:
"""
生成完整报告(分章节实时输出)
每个章节生成完成后立即保存到文件夹,不需要等待整个报告完成。
文件结构:
reports/{report_id}/
meta.json - 报告元信息
outline.json - 报告大纲
progress.json - 生成进度
section_01.md - 第1章节
section_02.md - 第2章节
...
full_report.md - 完整报告
Args:
progress_callback: 进度回调函数 (stage, progress, message)
Returns:
Report: 完整报告
"""
import uuid
report_id = f"report_{uuid.uuid4().hex[:12]}"
report = Report(
report_id=report_id,
simulation_id=self.simulation_id,
graph_id=self.graph_id,
simulation_requirement=self.simulation_requirement,
status=ReportStatus.PENDING,
created_at=datetime.now().isoformat()
)
# 已完成的章节标题列表(用于进度追踪)
completed_section_titles = []
try:
# 初始化:创建报告文件夹并保存初始状态
ReportManager._ensure_report_folder(report_id)
ReportManager.update_progress(
report_id, "pending", 0, "初始化报告...",
completed_sections=[]
)
ReportManager.save_report(report)
# 阶段1: 规划大纲
report.status = ReportStatus.PLANNING
ReportManager.update_progress(
report_id, "planning", 5, "开始规划报告大纲...",
completed_sections=[]
)
if progress_callback:
progress_callback("planning", 0, "开始规划报告大纲...")
outline = self.plan_outline(
progress_callback=lambda stage, prog, msg:
progress_callback(stage, prog // 5, msg) if progress_callback else None
)
report.outline = outline
# 保存大纲到文件
ReportManager.save_outline(report_id, outline)
ReportManager.update_progress(
report_id, "planning", 15, f"大纲规划完成,共{len(outline.sections)}个章节",
completed_sections=[]
)
ReportManager.save_report(report)
logger.info(f"大纲已保存到文件: {report_id}/outline.json")
# 阶段2: 逐章节生成(分章节保存)
report.status = ReportStatus.GENERATING
total_sections = len(outline.sections)
generated_sections = [] # 保存内容用于上下文
for i, section in enumerate(outline.sections):
section_num = i + 1
base_progress = 20 + int((i / total_sections) * 70)
# 更新进度
ReportManager.update_progress(
report_id, "generating", base_progress,
f"正在生成章节: {section.title} ({section_num}/{total_sections})",
current_section=section.title,
completed_sections=completed_section_titles
)
if progress_callback:
progress_callback(
"generating",
base_progress,
f"正在生成章节: {section.title} ({section_num}/{total_sections})"
)
# 生成章节内容
section_content = self._generate_section_react(
section=section,
outline=outline,
previous_sections=generated_sections,
progress_callback=lambda stage, prog, msg:
progress_callback(
stage,
base_progress + int(prog * 0.7 / total_sections),
msg
) if progress_callback else None
)
section.content = section_content
generated_sections.append(f"## {section.title}\n\n{section_content}")
# 【关键】立即保存章节到文件
ReportManager.save_section(report_id, section_num, section)
completed_section_titles.append(section.title)
logger.info(f"章节已保存: {report_id}/section_{section_num:02d}.md")
# 更新进度
ReportManager.update_progress(
report_id, "generating",
base_progress + int(70 / total_sections),
f"章节 {section.title} 已完成",
current_section=None,
completed_sections=completed_section_titles
)
# 生成并保存子章节
for j, subsection in enumerate(section.subsections):
subsection_num = j + 1
if progress_callback:
progress_callback(
"generating",
base_progress + int(((j + 1) / len(section.subsections)) * 5),
f"正在生成子章节: {subsection.title}"
)
ReportManager.update_progress(
report_id, "generating",
base_progress + int(((j + 1) / len(section.subsections)) * 5),
f"正在生成子章节: {subsection.title}",
current_section=subsection.title,
completed_sections=completed_section_titles
)
subsection_content = self._generate_section_react(
section=subsection,
outline=outline,
previous_sections=generated_sections,
progress_callback=None
)
subsection.content = subsection_content
generated_sections.append(f"### {subsection.title}\n\n{subsection_content}")
# 【关键】立即保存子章节到文件
ReportManager.save_section(
report_id, subsection_num, subsection,
is_subsection=True, parent_index=section_num
)
completed_section_titles.append(f" └─ {subsection.title}")
logger.info(f"子章节已保存: {report_id}/section_{section_num:02d}_{subsection_num:02d}.md")
# 阶段3: 组装完整报告
if progress_callback:
progress_callback("generating", 95, "正在组装完整报告...")
ReportManager.update_progress(
report_id, "generating", 95, "正在组装完整报告...",
completed_sections=completed_section_titles
)
# 使用ReportManager组装完整报告
report.markdown_content = ReportManager.assemble_full_report(report_id, outline)
report.status = ReportStatus.COMPLETED
report.completed_at = datetime.now().isoformat()
# 保存最终报告
ReportManager.save_report(report)
ReportManager.update_progress(
report_id, "completed", 100, "报告生成完成",
completed_sections=completed_section_titles
)
if progress_callback:
progress_callback("completed", 100, "报告生成完成")
logger.info(f"报告生成完成: {report_id}")
return report
except Exception as e:
logger.error(f"报告生成失败: {str(e)}")
report.status = ReportStatus.FAILED
report.error = str(e)
# 保存失败状态
try:
ReportManager.save_report(report)
ReportManager.update_progress(
report_id, "failed", -1, f"报告生成失败: {str(e)}",
completed_sections=completed_section_titles
)
except Exception:
pass # 忽略保存失败的错误
return report
def chat(
self,
message: str,
chat_history: List[Dict[str, str]] = None
) -> Dict[str, Any]:
"""
与Report Agent对话
在对话中Agent可以自主调用检索工具来回答问题
Args:
message: 用户消息
chat_history: 对话历史
Returns:
{
"response": "Agent回复",
"tool_calls": [调用的工具列表],
"sources": [信息来源]
}
"""
logger.info(f"Report Agent对话: {message[:50]}...")
chat_history = chat_history or []
system_prompt = f"""你是一个专业的舆情分析助手,负责回答关于模拟分析报告的问题。
模拟需求: {self.simulation_requirement}
图谱ID: {self.graph_id}
你可以使用以下工具来获取信息:
{self._get_tools_description()}
工具调用格式:
<tool_call>
{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}}
</tool_call>
回答要求:
1. 基于事实和数据回答
2. 引用具体信息来源
3. 如果不确定,说明信息限制
4. 保持专业和客观"""
# 构建消息
messages = [{"role": "system", "content": system_prompt}]
# 添加历史对话
for h in chat_history[-10:]: # 限制历史长度
messages.append(h)
messages.append({"role": "user", "content": message})
# ReACT循环
tool_calls_made = []
max_iterations = 3
for iteration in range(max_iterations):
response = self.llm.chat(
messages=messages,
temperature=0.5,
max_tokens=2048
)
# 解析工具调用
tool_calls = self._parse_tool_calls(response)
if not tool_calls:
# 没有工具调用,返回响应
# 清理响应中的工具调用标记
clean_response = re.sub(r'<tool_call>.*?</tool_call>', '', response, flags=re.DOTALL)
clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response)
return {
"response": clean_response.strip(),
"tool_calls": tool_calls_made,
"sources": []
}
# 执行工具调用
tool_results = []
for call in tool_calls:
result = self._execute_tool(call["name"], call.get("parameters", {}))
tool_results.append({
"tool": call["name"],
"result": result[:1000] # 限制长度
})
tool_calls_made.append(call)
# 将结果添加到消息
messages.append({"role": "assistant", "content": response})
observation = "工具调用结果:\n" + "\n\n".join([
f"[{r['tool']}]: {r['result']}" for r in tool_results
])
messages.append({"role": "user", "content": observation + "\n\n请基于以上信息回答问题。"})
# 达到最大迭代,获取最终响应
final_response = self.llm.chat(
messages=messages,
temperature=0.5,
max_tokens=2048
)
return {
"response": final_response,
"tool_calls": tool_calls_made,
"sources": []
}
class ReportManager:
"""
报告管理器
负责报告的持久化存储和检索
文件结构(分章节输出):
reports/
{report_id}/
meta.json - 报告元信息和状态
outline.json - 报告大纲
progress.json - 生成进度
section_01.md - 第1章节
section_02.md - 第2章节
...
full_report.md - 完整报告
"""
# 报告存储目录
REPORTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'reports')
@classmethod
def _ensure_reports_dir(cls):
"""确保报告根目录存在"""
os.makedirs(cls.REPORTS_DIR, exist_ok=True)
@classmethod
def _get_report_folder(cls, report_id: str) -> str:
"""获取报告文件夹路径"""
return os.path.join(cls.REPORTS_DIR, report_id)
@classmethod
def _ensure_report_folder(cls, report_id: str) -> str:
"""确保报告文件夹存在并返回路径"""
folder = cls._get_report_folder(report_id)
os.makedirs(folder, exist_ok=True)
return folder
@classmethod
def _get_report_path(cls, report_id: str) -> str:
"""获取报告元信息文件路径"""
return os.path.join(cls._get_report_folder(report_id), "meta.json")
@classmethod
def _get_report_markdown_path(cls, report_id: str) -> str:
"""获取完整报告Markdown文件路径"""
return os.path.join(cls._get_report_folder(report_id), "full_report.md")
@classmethod
def _get_outline_path(cls, report_id: str) -> str:
"""获取大纲文件路径"""
return os.path.join(cls._get_report_folder(report_id), "outline.json")
@classmethod
def _get_progress_path(cls, report_id: str) -> str:
"""获取进度文件路径"""
return os.path.join(cls._get_report_folder(report_id), "progress.json")
@classmethod
def _get_section_path(cls, report_id: str, section_index: int) -> str:
"""获取章节Markdown文件路径"""
return os.path.join(cls._get_report_folder(report_id), f"section_{section_index:02d}.md")
@classmethod
def save_outline(cls, report_id: str, outline: ReportOutline) -> None:
"""
保存报告大纲
在规划阶段完成后立即调用
"""
cls._ensure_report_folder(report_id)
with open(cls._get_outline_path(report_id), 'w', encoding='utf-8') as f:
json.dump(outline.to_dict(), f, ensure_ascii=False, indent=2)
logger.info(f"大纲已保存: {report_id}")
@classmethod
def save_section(
cls,
report_id: str,
section_index: int,
section: ReportSection,
is_subsection: bool = False,
parent_index: int = None
) -> str:
"""
保存单个章节
在每个章节生成完成后立即调用,实现分章节输出
Args:
report_id: 报告ID
section_index: 章节索引从1开始
section: 章节对象
is_subsection: 是否是子章节
parent_index: 父章节索引(子章节时使用)
Returns:
保存的文件路径
"""
cls._ensure_report_folder(report_id)
# 确定章节级别和标题格式
if is_subsection and parent_index is not None:
level = "###"
file_suffix = f"section_{parent_index:02d}_{section_index:02d}.md"
else:
level = "##"
file_suffix = f"section_{section_index:02d}.md"
# 构建章节Markdown内容
md_content = f"{level} {section.title}\n\n"
if section.content:
md_content += f"{section.content}\n\n"
# 保存文件
file_path = os.path.join(cls._get_report_folder(report_id), file_suffix)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(md_content)
logger.info(f"章节已保存: {report_id}/{file_suffix}")
return file_path
@classmethod
def update_progress(
cls,
report_id: str,
status: str,
progress: int,
message: str,
current_section: str = None,
completed_sections: List[str] = None
) -> None:
"""
更新报告生成进度
前端可以通过读取progress.json获取实时进度
"""
cls._ensure_report_folder(report_id)
progress_data = {
"status": status,
"progress": progress,
"message": message,
"current_section": current_section,
"completed_sections": completed_sections or [],
"updated_at": datetime.now().isoformat()
}
with open(cls._get_progress_path(report_id), 'w', encoding='utf-8') as f:
json.dump(progress_data, f, ensure_ascii=False, indent=2)
@classmethod
def get_progress(cls, report_id: str) -> Optional[Dict[str, Any]]:
"""获取报告生成进度"""
path = cls._get_progress_path(report_id)
if not os.path.exists(path):
return None
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
@classmethod
def get_generated_sections(cls, report_id: str) -> List[Dict[str, Any]]:
"""
获取已生成的章节列表
返回所有已保存的章节文件信息
"""
folder = cls._get_report_folder(report_id)
if not os.path.exists(folder):
return []
sections = []
for filename in sorted(os.listdir(folder)):
if filename.startswith('section_') and filename.endswith('.md'):
file_path = os.path.join(folder, filename)
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 从文件名解析章节索引
parts = filename.replace('.md', '').split('_')
section_index = int(parts[1])
subsection_index = int(parts[2]) if len(parts) > 2 else None
sections.append({
"filename": filename,
"section_index": section_index,
"subsection_index": subsection_index,
"content": content,
"is_subsection": subsection_index is not None
})
return sections
@classmethod
def assemble_full_report(cls, report_id: str, outline: ReportOutline) -> str:
"""
组装完整报告
从已保存的章节文件组装完整报告
"""
folder = cls._get_report_folder(report_id)
# 构建报告头部
md_content = f"# {outline.title}\n\n"
md_content += f"> {outline.summary}\n\n"
md_content += f"---\n\n"
# 按顺序读取所有章节文件
sections = cls.get_generated_sections(report_id)
for section_info in sections:
md_content += section_info["content"]
# 保存完整报告
full_path = cls._get_report_markdown_path(report_id)
with open(full_path, 'w', encoding='utf-8') as f:
f.write(md_content)
logger.info(f"完整报告已组装: {report_id}")
return md_content
@classmethod
def save_report(cls, report: Report) -> None:
"""保存报告元信息和完整报告"""
cls._ensure_report_folder(report.report_id)
# 保存元信息JSON
with open(cls._get_report_path(report.report_id), 'w', encoding='utf-8') as f:
json.dump(report.to_dict(), f, ensure_ascii=False, indent=2)
# 保存大纲
if report.outline:
cls.save_outline(report.report_id, report.outline)
# 保存完整Markdown报告
if report.markdown_content:
with open(cls._get_report_markdown_path(report.report_id), 'w', encoding='utf-8') as f:
f.write(report.markdown_content)
logger.info(f"报告已保存: {report.report_id}")
@classmethod
def get_report(cls, report_id: str) -> Optional[Report]:
"""获取报告"""
path = cls._get_report_path(report_id)
if not os.path.exists(path):
# 兼容旧格式检查直接存储在reports目录下的文件
old_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json")
if os.path.exists(old_path):
path = old_path
else:
return None
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 重建Report对象
outline = None
if data.get('outline'):
outline_data = data['outline']
sections = []
for s in outline_data.get('sections', []):
subsections = [
ReportSection(title=sub['title'], content=sub.get('content', ''))
for sub in s.get('subsections', [])
]
sections.append(ReportSection(
title=s['title'],
content=s.get('content', ''),
subsections=subsections
))
outline = ReportOutline(
title=outline_data['title'],
summary=outline_data['summary'],
sections=sections
)
# 如果markdown_content为空尝试从full_report.md读取
markdown_content = data.get('markdown_content', '')
if not markdown_content:
full_report_path = cls._get_report_markdown_path(report_id)
if os.path.exists(full_report_path):
with open(full_report_path, 'r', encoding='utf-8') as f:
markdown_content = f.read()
return Report(
report_id=data['report_id'],
simulation_id=data['simulation_id'],
graph_id=data['graph_id'],
simulation_requirement=data['simulation_requirement'],
status=ReportStatus(data['status']),
outline=outline,
markdown_content=markdown_content,
created_at=data.get('created_at', ''),
completed_at=data.get('completed_at', ''),
error=data.get('error')
)
@classmethod
def get_report_by_simulation(cls, simulation_id: str) -> Optional[Report]:
"""根据模拟ID获取报告"""
cls._ensure_reports_dir()
for item in os.listdir(cls.REPORTS_DIR):
item_path = os.path.join(cls.REPORTS_DIR, item)
# 新格式:文件夹
if os.path.isdir(item_path):
report = cls.get_report(item)
if report and report.simulation_id == simulation_id:
return report
# 兼容旧格式JSON文件
elif item.endswith('.json'):
report_id = item[:-5]
report = cls.get_report(report_id)
if report and report.simulation_id == simulation_id:
return report
return None
@classmethod
def list_reports(cls, simulation_id: Optional[str] = None, limit: int = 50) -> List[Report]:
"""列出报告"""
cls._ensure_reports_dir()
reports = []
for item in os.listdir(cls.REPORTS_DIR):
item_path = os.path.join(cls.REPORTS_DIR, item)
# 新格式:文件夹
if os.path.isdir(item_path):
report = cls.get_report(item)
if report:
if simulation_id is None or report.simulation_id == simulation_id:
reports.append(report)
# 兼容旧格式JSON文件
elif item.endswith('.json'):
report_id = item[:-5]
report = cls.get_report(report_id)
if report:
if simulation_id is None or report.simulation_id == simulation_id:
reports.append(report)
# 按创建时间倒序
reports.sort(key=lambda r: r.created_at, reverse=True)
return reports[:limit]
@classmethod
def delete_report(cls, report_id: str) -> bool:
"""删除报告(整个文件夹)"""
import shutil
folder_path = cls._get_report_folder(report_id)
# 新格式:删除整个文件夹
if os.path.exists(folder_path) and os.path.isdir(folder_path):
shutil.rmtree(folder_path)
logger.info(f"报告文件夹已删除: {report_id}")
return True
# 兼容旧格式:删除单独的文件
deleted = False
old_json_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json")
old_md_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.md")
if os.path.exists(old_json_path):
os.remove(old_json_path)
deleted = True
if os.path.exists(old_md_path):
os.remove(old_md_path)
deleted = True
return deleted