From 7601d78fd4d9edb60b7455e5fa16508a00a2d3a3 Mon Sep 17 00:00:00 2001
From: 666ghj <670939375@qq.com>
Date: Sat, 14 Feb 2026 16:56:48 +0800
Subject: [PATCH] feat(report_agent): enhance interview text processing and
response handling; improve quote extraction and formatting for better clarity
---
backend/app/services/zep_tools.py | 145 ++++++++++++++++++------
frontend/src/components/Step4Report.vue | 144 ++++++++++++++---------
2 files changed, 205 insertions(+), 84 deletions(-)
diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py
index 4bd2896..1694632 100644
--- a/backend/app/services/zep_tools.py
+++ b/backend/app/services/zep_tools.py
@@ -308,7 +308,30 @@ class AgentInterview:
if self.key_quotes:
text += "\n**关键引言:**\n"
for quote in self.key_quotes:
- text += f"> \"{quote}\"\n"
+ # 清理各种引号
+ clean_quote = quote.replace('\u201c', '').replace('\u201d', '').replace('"', '')
+ clean_quote = clean_quote.replace('\u300c', '').replace('\u300d', '')
+ clean_quote = clean_quote.strip()
+ # 去掉开头的标点
+ while clean_quote and clean_quote[0] in ',,;;::、。!?\n\r\t ':
+ clean_quote = clean_quote[1:]
+ # 过滤包含问题编号的垃圾内容(问题1-9)
+ skip = False
+ for d in '123456789':
+ if f'\u95ee\u9898{d}' in clean_quote:
+ skip = True
+ break
+ if skip:
+ continue
+ # 截断过长内容(按句号截断,而非硬截断)
+ if len(clean_quote) > 150:
+ dot_pos = clean_quote.find('\u3002', 80)
+ if dot_pos > 0:
+ clean_quote = clean_quote[:dot_pos + 1]
+ else:
+ clean_quote = clean_quote[:147] + "..."
+ if clean_quote and len(clean_quote) >= 10:
+ text += f'> "{clean_quote}"\n'
return text
@@ -350,27 +373,26 @@ class InterviewResult:
def to_text(self) -> str:
"""转换为详细的文本格式,供LLM理解和报告引用"""
text_parts = [
- f"## 🎤 深度采访报告",
+ "## 深度采访报告",
f"**采访主题:** {self.interview_topic}",
f"**采访人数:** {self.interviewed_count} / {self.total_agents} 位模拟Agent",
- f"\n### 采访对象选择理由",
- f"{self.selection_reasoning}",
- f"\n---"
+ "\n### 采访对象选择理由",
+ self.selection_reasoning or "(自动选择)",
+ "\n---",
+ "\n### 采访实录",
]
-
- # 各Agent的采访内容
+
if self.interviews:
- text_parts.append(f"\n### 采访实录")
for i, interview in enumerate(self.interviews, 1):
text_parts.append(f"\n#### 采访 #{i}: {interview.agent_name}")
text_parts.append(interview.to_text())
text_parts.append("\n---")
-
- # 采访摘要
- if self.summary:
- text_parts.append(f"\n### 采访摘要与核心观点")
- text_parts.append(self.summary)
-
+ else:
+ text_parts.append("(无采访记录)\n\n---")
+
+ text_parts.append("\n### 采访摘要与核心观点")
+ text_parts.append(self.summary or "(无摘要)")
+
return "\n".join(text_parts)
@@ -1329,8 +1351,18 @@ class ZepToolsService:
# 将问题合并为一个采访prompt
combined_prompt = "\n".join([f"{i+1}. {q}" for i, q in enumerate(result.interview_questions)])
- # 添加优化前缀,避免Agent调用工具而直接回复文本
- INTERVIEW_PROMPT_PREFIX = "结合你的人设、所有的过往记忆与行动,不调用任何工具直接用文本回复我:"
+ # 添加优化前缀,约束Agent回复格式
+ INTERVIEW_PROMPT_PREFIX = (
+ "你正在接受一次采访。请结合你的人设、所有的过往记忆与行动,"
+ "以纯文本方式直接回答以下问题。\n"
+ "回复要求:\n"
+ "1. 直接用自然语言回答,不要调用任何工具\n"
+ "2. 不要返回JSON格式或工具调用格式\n"
+ "3. 不要使用Markdown标题(如#、##、###)\n"
+ "4. 按问题编号逐一回答,每个回答以「问题X:」开头(X为问题编号)\n"
+ "5. 每个问题的回答之间用空行分隔\n"
+ "6. 回答要有实质内容,每个问题至少回答2-3句话\n\n"
+ )
optimized_prompt = f"{INTERVIEW_PROMPT_PREFIX}{combined_prompt}"
# Step 4: 调用真实的采访API(不指定platform,默认双平台同时采访)
@@ -1380,26 +1412,43 @@ class ZepToolsService:
twitter_response = twitter_result.get("response", "")
reddit_response = reddit_result.get("response", "")
-
- # 合并两个平台的回答
- response_parts = []
- if twitter_response:
- response_parts.append(f"【Twitter平台回答】\n{twitter_response}")
- if reddit_response:
- response_parts.append(f"【Reddit平台回答】\n{reddit_response}")
-
- if response_parts:
- response_text = "\n\n".join(response_parts)
- else:
- response_text = "[无回复]"
-
+
+ # 清理可能的工具调用 JSON 包裹
+ twitter_response = self._clean_tool_call_response(twitter_response)
+ reddit_response = self._clean_tool_call_response(reddit_response)
+
+ # 始终输出双平台标记
+ twitter_text = twitter_response if twitter_response else "(该平台未获得回复)"
+ reddit_text = reddit_response if reddit_response else "(该平台未获得回复)"
+ response_text = f"【Twitter平台回答】\n{twitter_text}\n\n【Reddit平台回答】\n{reddit_text}"
+
# 提取关键引言(从两个平台的回答中)
import re
combined_responses = f"{twitter_response} {reddit_response}"
- key_quotes = re.findall(r'[""「」『』]([^""「」『』]{10,100})[""「」『』]', combined_responses)
+
+ # 清理响应文本:去掉标记、编号、Markdown 等干扰
+ clean_text = re.sub(r'#{1,6}\s+', '', combined_responses)
+ clean_text = re.sub(r'\{[^}]*tool_name[^}]*\}', '', clean_text)
+ clean_text = re.sub(r'[*_`|>~\-]{2,}', '', clean_text)
+ clean_text = re.sub(r'问题\d+[::]\s*', '', clean_text)
+ clean_text = re.sub(r'【[^】]+】', '', clean_text)
+
+ # 策略1(主): 提取完整的有实质内容的句子
+ sentences = re.split(r'[。!?]', clean_text)
+ meaningful = [
+ s.strip() for s in sentences
+ if 20 <= len(s.strip()) <= 150
+ and not re.match(r'^[\s\W,,;;::、]+', s.strip())
+ and not s.strip().startswith(('{', '问题'))
+ ]
+ meaningful.sort(key=len, reverse=True)
+ key_quotes = [s + "。" for s in meaningful[:3]]
+
+ # 策略2(补充): 正确配对的中文引号「」内长文本
if not key_quotes:
- sentences = combined_responses.split('。')
- key_quotes = [s.strip() + '。' for s in sentences if len(s.strip()) > 20][:3]
+ paired = re.findall(r'\u201c([^\u201c\u201d]{15,100})\u201d', clean_text)
+ paired += re.findall(r'\u300c([^\u300c\u300d]{15,100})\u300d', clean_text)
+ key_quotes = [q for q in paired if not re.match(r'^[,,;;::、]', q)][:3]
interview = AgentInterview(
agent_name=agent_name,
@@ -1435,6 +1484,27 @@ class ZepToolsService:
logger.info(f"InterviewAgents完成: 采访了 {result.interviewed_count} 个Agent(双平台)")
return result
+ @staticmethod
+ def _clean_tool_call_response(response: str) -> str:
+ """清理 Agent 回复中的 JSON 工具调用包裹,提取实际内容"""
+ if not response or not response.strip().startswith('{'):
+ return response
+ text = response.strip()
+ if 'tool_name' not in text[:80]:
+ return response
+ import re as _re
+ try:
+ data = json.loads(text)
+ if isinstance(data, dict) and 'arguments' in data:
+ for key in ('content', 'text', 'body', 'message', 'reply'):
+ if key in data['arguments']:
+ return str(data['arguments'][key])
+ except (json.JSONDecodeError, KeyError, TypeError):
+ match = _re.search(r'"content"\s*:\s*"((?:[^"\\]|\\.)*)"', text)
+ if match:
+ return match.group(1).replace('\\n', '\n').replace('\\"', '"')
+ return response
+
def _load_agent_profiles(self, simulation_id: str) -> List[Dict[str, Any]]:
"""加载模拟的Agent人设文件"""
import os
@@ -1581,6 +1651,8 @@ class ZepToolsService:
2. 针对不同角色可能有不同答案
3. 涵盖事实、观点、感受等多个维度
4. 语言自然,像真实采访一样
+5. 每个问题控制在50字以内,简洁明了
+6. 直接提问,不要包含背景说明或前缀
返回JSON格式:{"questions": ["问题1", "问题2", ...]}"""
@@ -1633,7 +1705,14 @@ class ZepToolsService:
2. 指出观点的共识和分歧
3. 突出有价值的引言
4. 客观中立,不偏袒任何一方
-5. 控制在1000字内"""
+5. 控制在1000字内
+
+格式约束(必须遵守):
+- 使用纯文本段落,用空行分隔不同部分
+- 不要使用Markdown标题(如#、##、###)
+- 不要使用分割线(如---、***)
+- 引用受访者原话时使用中文引号「」
+- 可以使用**加粗**标记关键词,但不要使用其他Markdown语法"""
user_prompt = f"""采访主题:{interview_requirement}
diff --git a/frontend/src/components/Step4Report.vue b/frontend/src/components/Step4Report.vue
index 28d3cf1..f44aedc 100644
--- a/frontend/src/components/Step4Report.vue
+++ b/frontend/src/components/Step4Report.vue
@@ -849,27 +849,36 @@ const parseInterview = (text) => {
interview.redditAnswer = redditMatch[1].trim()
}
- // 如果只有一个平台的回答,将其作为主回答
- // 这样无论显示哪个平台都能有内容
+ // 平台回退逻辑(兼容旧格式:只有一个平台标记的情况)
if (!twitterMatch && redditMatch) {
- // 只有 Reddit 回答,将其也设为 twitterAnswer 作为默认显示
- interview.twitterAnswer = interview.redditAnswer
+ // 只有 Reddit 回答,仅在非占位文本时复制为默认显示
+ if (interview.redditAnswer && interview.redditAnswer !== '(该平台未获得回复)') {
+ interview.twitterAnswer = interview.redditAnswer
+ }
} else if (twitterMatch && !redditMatch) {
- // 只有 Twitter 回答,将其也设为 redditAnswer
- interview.redditAnswer = interview.twitterAnswer
+ if (interview.twitterAnswer && interview.twitterAnswer !== '(该平台未获得回复)') {
+ interview.redditAnswer = interview.twitterAnswer
+ }
} else if (!twitterMatch && !redditMatch) {
- // 如果没有明确分平台,整体作为回答
+ // 没有分平台标记(极旧格式),整体作为回答
interview.twitterAnswer = answerText
}
}
- // 提取关键引言
+ // 提取关键引言(兼容多种引号格式)
const quotesMatch = block.match(/\*\*关键引言:\*\*\n([\s\S]*?)(?=\n---|\n####|$)/)
if (quotesMatch) {
const quotesText = quotesMatch[1]
- const quoteMatches = quotesText.match(/> "([^"]+)"/g)
+ // 优先匹配 > "text" 格式
+ let quoteMatches = quotesText.match(/> "([^"]+)"/g)
+ // 回退:匹配 > "text" 或 > \u201Ctext\u201D(中文引号)
+ if (!quoteMatches) {
+ quoteMatches = quotesText.match(/> [\u201C""]([^\u201D""]+)[\u201D""]/g)
+ }
if (quoteMatches) {
- interview.quotes = quoteMatches.map(q => q.replace(/^> "|"$/g, '').trim())
+ interview.quotes = quoteMatches
+ .map(q => q.replace(/^> [\u201C""]|[\u201D""]$/g, '').trim())
+ .filter(q => q)
}
}
@@ -1314,79 +1323,100 @@ const InterviewDisplay = {
return text.substring(0, 400) + '...'
}
+ // 检查是否为平台占位文本
+ const isPlaceholderText = (text) => {
+ if (!text) return true
+ const t = text.trim()
+ return t === '(该平台未获得回复)' || t === '(该平台未获得回复)' || t === '[无回复]'
+ }
+
// 尝试按问题编号分割回答
const splitAnswerByQuestions = (answerText, questionCount) => {
if (!answerText || questionCount <= 0) return [answerText]
-
- // 更健壮的分割逻辑:查找所有 "数字." 格式的编号位置
- // 支持格式:
- // - "1. \n内容" (数字+点+空格+换行+内容)
- // - "\n\n2. \n内容" (换行+数字+点+空格+换行+内容)
- // 使用更宽松的匹配:开头或换行后的数字+点+空白
- const numberPattern = /(?:^|[\r\n]+)(\d+)\.\s+/g
- const matches = []
+ if (isPlaceholderText(answerText)) return ['']
+
+ // 支持两种编号格式:
+ // 1. "问题X:" 或 "问题X:" (中文格式,后端新格式)
+ // 2. "1. " 或 "\n1. " (数字+点,旧格式兼容)
+ let matches = []
let match
-
- while ((match = numberPattern.exec(answerText)) !== null) {
+
+ // 优先尝试 "问题X:" 格式
+ const cnPattern = /(?:^|[\r\n]+)问题(\d+)[::]\s*/g
+ while ((match = cnPattern.exec(answerText)) !== null) {
matches.push({
num: parseInt(match[1]),
index: match.index,
fullMatch: match[0]
})
}
-
+
+ // 如果没匹配到,回退到 "数字." 格式
+ if (matches.length === 0) {
+ const numPattern = /(?:^|[\r\n]+)(\d+)\.\s+/g
+ while ((match = numPattern.exec(answerText)) !== null) {
+ matches.push({
+ num: parseInt(match[1]),
+ index: match.index,
+ fullMatch: match[0]
+ })
+ }
+ }
+
// 如果没有找到编号或只找到一个,返回整体
if (matches.length <= 1) {
- // 尝试移除开头的编号(格式:1. \n 或 1. )
- const cleaned = answerText.replace(/^\d+\.\s+/, '').trim()
+ const cleaned = answerText
+ .replace(/^问题\d+[::]\s*/, '')
+ .replace(/^\d+\.\s+/, '')
+ .trim()
return [cleaned || answerText]
}
-
+
// 按编号提取各部分
const parts = []
for (let i = 0; i < matches.length; i++) {
const current = matches[i]
const next = matches[i + 1]
-
+
const startIdx = current.index + current.fullMatch.length
const endIdx = next ? next.index : answerText.length
-
+
let part = answerText.substring(startIdx, endIdx).trim()
- // 移除末尾可能的多余换行
part = part.replace(/[\r\n]+$/, '').trim()
parts.push(part)
}
-
- // 如果分割成功且数量合理,返回分割结果
+
if (parts.length > 0 && parts.some(p => p)) {
return parts
}
-
+
return [answerText]
}
// 获取某个问题对应的回答
const getAnswerForQuestion = (interview, qIdx, platform) => {
const answer = platform === 'twitter' ? interview.twitterAnswer : (interview.redditAnswer || interview.twitterAnswer)
- if (!answer) return ''
-
+ if (!answer || isPlaceholderText(answer)) return answer || ''
+
const questionCount = interview.questions?.length || 1
const answers = splitAnswerByQuestions(answer, questionCount)
-
- // 如果只有一个回答部分,或者索引超出,返回完整回答
- if (answers.length === 1 || qIdx >= answers.length) {
- return qIdx === 0 ? answer : ''
+
+ // 分割成功且索引有效
+ if (answers.length > 1 && qIdx < answers.length) {
+ return answers[qIdx] || ''
}
-
- return answers[qIdx] || ''
+
+ // 分割失败:第一个问题返回完整回答,其余返回空
+ return qIdx === 0 ? answer : ''
}
- // 检查某个问题是否有双平台回答
+ // 检查某个问题是否有双平台回答(过滤占位文本)
const hasMultiplePlatforms = (interview, qIdx) => {
if (!interview.twitterAnswer || !interview.redditAnswer) return false
const twitterAnswer = getAnswerForQuestion(interview, qIdx, 'twitter')
const redditAnswer = getAnswerForQuestion(interview, qIdx, 'reddit')
- return twitterAnswer && redditAnswer && twitterAnswer !== redditAnswer
+ // 两个平台都有真实回答(非占位文本)且内容不同
+ return !isPlaceholderText(twitterAnswer) && !isPlaceholderText(redditAnswer) && twitterAnswer !== redditAnswer
}
return () => h('div', { class: 'interview-display' }, [
@@ -1453,7 +1483,8 @@ const InterviewDisplay = {
const hasDualPlatform = hasMultiplePlatforms(interview, qIdx)
const expandKey = `${activeIndex.value}-${qIdx}`
const isExpanded = expandedAnswers.value.has(expandKey)
-
+ const isPlaceholder = isPlaceholderText(answerText)
+
return h('div', { class: 'qa-pair', key: qIdx }, [
// Question Block
h('div', { class: 'qa-question' }, [
@@ -1463,14 +1494,14 @@ const InterviewDisplay = {
h('div', { class: 'qa-text' }, question)
])
]),
-
+
// Answer Block
- answerText && h('div', { class: 'qa-answer' }, [
+ answerText && h('div', { class: ['qa-answer', { 'answer-placeholder': isPlaceholder }] }, [
h('div', { class: 'qa-badge a-badge' }, `A${qIdx + 1}`),
h('div', { class: 'qa-content' }, [
h('div', { class: 'qa-answer-header' }, [
h('div', { class: 'qa-sender' }, interview?.name || 'Agent'),
- // 双平台切换按钮
+ // 双平台切换按钮(仅在有真实双平台回答时显示)
hasDualPlatform && h('div', { class: 'platform-switch' }, [
h('button', {
class: ['platform-btn', { active: currentPlatform === 'twitter' }],
@@ -1494,14 +1525,16 @@ const InterviewDisplay = {
])
])
]),
- h('div', {
- class: 'qa-text answer-text',
- innerHTML: formatAnswer(answerText, isExpanded)
- .replace(/\*\*(.+?)\*\*/g, '$1')
- .replace(/\n/g, '
')
+ h('div', {
+ class: ['qa-text', 'answer-text', { 'placeholder-text': isPlaceholder }],
+ innerHTML: isPlaceholder
+ ? answerText
+ : formatAnswer(answerText, isExpanded)
+ .replace(/\*\*(.+?)\*\*/g, '$1')
+ .replace(/\n/g, '
')
}),
- // Expand/Collapse Button
- answerText.length > 400 && h('button', {
+ // Expand/Collapse Button(占位文本不显示)
+ !isPlaceholder && answerText.length > 400 && h('button', {
class: 'expand-answer-btn',
onClick: () => toggleAnswer(expandKey)
}, isExpanded ? 'Show Less' : 'Show More')
@@ -3913,6 +3946,15 @@ watch(() => props.reportId, (newId) => {
margin-top: 0;
}
+:deep(.interview-display .answer-placeholder) {
+ opacity: 0.6;
+}
+
+:deep(.interview-display .placeholder-text) {
+ font-style: italic;
+ color: #9CA3AF;
+}
+
:deep(.interview-display .qa-answer-header) {
display: flex;
justify-content: space-between;