diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py index 4bd2896..1694632 100644 --- a/backend/app/services/zep_tools.py +++ b/backend/app/services/zep_tools.py @@ -308,7 +308,30 @@ class AgentInterview: if self.key_quotes: text += "\n**关键引言:**\n" for quote in self.key_quotes: - text += f"> \"{quote}\"\n" + # 清理各种引号 + clean_quote = quote.replace('\u201c', '').replace('\u201d', '').replace('"', '') + clean_quote = clean_quote.replace('\u300c', '').replace('\u300d', '') + clean_quote = clean_quote.strip() + # 去掉开头的标点 + while clean_quote and clean_quote[0] in ',,;;::、。!?\n\r\t ': + clean_quote = clean_quote[1:] + # 过滤包含问题编号的垃圾内容(问题1-9) + skip = False + for d in '123456789': + if f'\u95ee\u9898{d}' in clean_quote: + skip = True + break + if skip: + continue + # 截断过长内容(按句号截断,而非硬截断) + if len(clean_quote) > 150: + dot_pos = clean_quote.find('\u3002', 80) + if dot_pos > 0: + clean_quote = clean_quote[:dot_pos + 1] + else: + clean_quote = clean_quote[:147] + "..." + if clean_quote and len(clean_quote) >= 10: + text += f'> "{clean_quote}"\n' return text @@ -350,27 +373,26 @@ class InterviewResult: def to_text(self) -> str: """转换为详细的文本格式,供LLM理解和报告引用""" text_parts = [ - f"## 🎤 深度采访报告", + "## 深度采访报告", f"**采访主题:** {self.interview_topic}", f"**采访人数:** {self.interviewed_count} / {self.total_agents} 位模拟Agent", - f"\n### 采访对象选择理由", - f"{self.selection_reasoning}", - f"\n---" + "\n### 采访对象选择理由", + self.selection_reasoning or "(自动选择)", + "\n---", + "\n### 采访实录", ] - - # 各Agent的采访内容 + if self.interviews: - text_parts.append(f"\n### 采访实录") for i, interview in enumerate(self.interviews, 1): text_parts.append(f"\n#### 采访 #{i}: {interview.agent_name}") text_parts.append(interview.to_text()) text_parts.append("\n---") - - # 采访摘要 - if self.summary: - text_parts.append(f"\n### 采访摘要与核心观点") - text_parts.append(self.summary) - + else: + text_parts.append("(无采访记录)\n\n---") + + text_parts.append("\n### 采访摘要与核心观点") + text_parts.append(self.summary or "(无摘要)") + return "\n".join(text_parts) @@ -1329,8 +1351,18 @@ class ZepToolsService: # 将问题合并为一个采访prompt combined_prompt = "\n".join([f"{i+1}. {q}" for i, q in enumerate(result.interview_questions)]) - # 添加优化前缀,避免Agent调用工具而直接回复文本 - INTERVIEW_PROMPT_PREFIX = "结合你的人设、所有的过往记忆与行动,不调用任何工具直接用文本回复我:" + # 添加优化前缀,约束Agent回复格式 + INTERVIEW_PROMPT_PREFIX = ( + "你正在接受一次采访。请结合你的人设、所有的过往记忆与行动," + "以纯文本方式直接回答以下问题。\n" + "回复要求:\n" + "1. 直接用自然语言回答,不要调用任何工具\n" + "2. 不要返回JSON格式或工具调用格式\n" + "3. 不要使用Markdown标题(如#、##、###)\n" + "4. 按问题编号逐一回答,每个回答以「问题X:」开头(X为问题编号)\n" + "5. 每个问题的回答之间用空行分隔\n" + "6. 回答要有实质内容,每个问题至少回答2-3句话\n\n" + ) optimized_prompt = f"{INTERVIEW_PROMPT_PREFIX}{combined_prompt}" # Step 4: 调用真实的采访API(不指定platform,默认双平台同时采访) @@ -1380,26 +1412,43 @@ class ZepToolsService: twitter_response = twitter_result.get("response", "") reddit_response = reddit_result.get("response", "") - - # 合并两个平台的回答 - response_parts = [] - if twitter_response: - response_parts.append(f"【Twitter平台回答】\n{twitter_response}") - if reddit_response: - response_parts.append(f"【Reddit平台回答】\n{reddit_response}") - - if response_parts: - response_text = "\n\n".join(response_parts) - else: - response_text = "[无回复]" - + + # 清理可能的工具调用 JSON 包裹 + twitter_response = self._clean_tool_call_response(twitter_response) + reddit_response = self._clean_tool_call_response(reddit_response) + + # 始终输出双平台标记 + twitter_text = twitter_response if twitter_response else "(该平台未获得回复)" + reddit_text = reddit_response if reddit_response else "(该平台未获得回复)" + response_text = f"【Twitter平台回答】\n{twitter_text}\n\n【Reddit平台回答】\n{reddit_text}" + # 提取关键引言(从两个平台的回答中) import re combined_responses = f"{twitter_response} {reddit_response}" - key_quotes = re.findall(r'[""「」『』]([^""「」『』]{10,100})[""「」『』]', combined_responses) + + # 清理响应文本:去掉标记、编号、Markdown 等干扰 + clean_text = re.sub(r'#{1,6}\s+', '', combined_responses) + clean_text = re.sub(r'\{[^}]*tool_name[^}]*\}', '', clean_text) + clean_text = re.sub(r'[*_`|>~\-]{2,}', '', clean_text) + clean_text = re.sub(r'问题\d+[::]\s*', '', clean_text) + clean_text = re.sub(r'【[^】]+】', '', clean_text) + + # 策略1(主): 提取完整的有实质内容的句子 + sentences = re.split(r'[。!?]', clean_text) + meaningful = [ + s.strip() for s in sentences + if 20 <= len(s.strip()) <= 150 + and not re.match(r'^[\s\W,,;;::、]+', s.strip()) + and not s.strip().startswith(('{', '问题')) + ] + meaningful.sort(key=len, reverse=True) + key_quotes = [s + "。" for s in meaningful[:3]] + + # 策略2(补充): 正确配对的中文引号「」内长文本 if not key_quotes: - sentences = combined_responses.split('。') - key_quotes = [s.strip() + '。' for s in sentences if len(s.strip()) > 20][:3] + paired = re.findall(r'\u201c([^\u201c\u201d]{15,100})\u201d', clean_text) + paired += re.findall(r'\u300c([^\u300c\u300d]{15,100})\u300d', clean_text) + key_quotes = [q for q in paired if not re.match(r'^[,,;;::、]', q)][:3] interview = AgentInterview( agent_name=agent_name, @@ -1435,6 +1484,27 @@ class ZepToolsService: logger.info(f"InterviewAgents完成: 采访了 {result.interviewed_count} 个Agent(双平台)") return result + @staticmethod + def _clean_tool_call_response(response: str) -> str: + """清理 Agent 回复中的 JSON 工具调用包裹,提取实际内容""" + if not response or not response.strip().startswith('{'): + return response + text = response.strip() + if 'tool_name' not in text[:80]: + return response + import re as _re + try: + data = json.loads(text) + if isinstance(data, dict) and 'arguments' in data: + for key in ('content', 'text', 'body', 'message', 'reply'): + if key in data['arguments']: + return str(data['arguments'][key]) + except (json.JSONDecodeError, KeyError, TypeError): + match = _re.search(r'"content"\s*:\s*"((?:[^"\\]|\\.)*)"', text) + if match: + return match.group(1).replace('\\n', '\n').replace('\\"', '"') + return response + def _load_agent_profiles(self, simulation_id: str) -> List[Dict[str, Any]]: """加载模拟的Agent人设文件""" import os @@ -1581,6 +1651,8 @@ class ZepToolsService: 2. 针对不同角色可能有不同答案 3. 涵盖事实、观点、感受等多个维度 4. 语言自然,像真实采访一样 +5. 每个问题控制在50字以内,简洁明了 +6. 直接提问,不要包含背景说明或前缀 返回JSON格式:{"questions": ["问题1", "问题2", ...]}""" @@ -1633,7 +1705,14 @@ class ZepToolsService: 2. 指出观点的共识和分歧 3. 突出有价值的引言 4. 客观中立,不偏袒任何一方 -5. 控制在1000字内""" +5. 控制在1000字内 + +格式约束(必须遵守): +- 使用纯文本段落,用空行分隔不同部分 +- 不要使用Markdown标题(如#、##、###) +- 不要使用分割线(如---、***) +- 引用受访者原话时使用中文引号「」 +- 可以使用**加粗**标记关键词,但不要使用其他Markdown语法""" user_prompt = f"""采访主题:{interview_requirement} diff --git a/frontend/src/components/Step4Report.vue b/frontend/src/components/Step4Report.vue index 28d3cf1..f44aedc 100644 --- a/frontend/src/components/Step4Report.vue +++ b/frontend/src/components/Step4Report.vue @@ -849,27 +849,36 @@ const parseInterview = (text) => { interview.redditAnswer = redditMatch[1].trim() } - // 如果只有一个平台的回答,将其作为主回答 - // 这样无论显示哪个平台都能有内容 + // 平台回退逻辑(兼容旧格式:只有一个平台标记的情况) if (!twitterMatch && redditMatch) { - // 只有 Reddit 回答,将其也设为 twitterAnswer 作为默认显示 - interview.twitterAnswer = interview.redditAnswer + // 只有 Reddit 回答,仅在非占位文本时复制为默认显示 + if (interview.redditAnswer && interview.redditAnswer !== '(该平台未获得回复)') { + interview.twitterAnswer = interview.redditAnswer + } } else if (twitterMatch && !redditMatch) { - // 只有 Twitter 回答,将其也设为 redditAnswer - interview.redditAnswer = interview.twitterAnswer + if (interview.twitterAnswer && interview.twitterAnswer !== '(该平台未获得回复)') { + interview.redditAnswer = interview.twitterAnswer + } } else if (!twitterMatch && !redditMatch) { - // 如果没有明确分平台,整体作为回答 + // 没有分平台标记(极旧格式),整体作为回答 interview.twitterAnswer = answerText } } - // 提取关键引言 + // 提取关键引言(兼容多种引号格式) const quotesMatch = block.match(/\*\*关键引言:\*\*\n([\s\S]*?)(?=\n---|\n####|$)/) if (quotesMatch) { const quotesText = quotesMatch[1] - const quoteMatches = quotesText.match(/> "([^"]+)"/g) + // 优先匹配 > "text" 格式 + let quoteMatches = quotesText.match(/> "([^"]+)"/g) + // 回退:匹配 > "text" 或 > \u201Ctext\u201D(中文引号) + if (!quoteMatches) { + quoteMatches = quotesText.match(/> [\u201C""]([^\u201D""]+)[\u201D""]/g) + } if (quoteMatches) { - interview.quotes = quoteMatches.map(q => q.replace(/^> "|"$/g, '').trim()) + interview.quotes = quoteMatches + .map(q => q.replace(/^> [\u201C""]|[\u201D""]$/g, '').trim()) + .filter(q => q) } } @@ -1314,79 +1323,100 @@ const InterviewDisplay = { return text.substring(0, 400) + '...' } + // 检查是否为平台占位文本 + const isPlaceholderText = (text) => { + if (!text) return true + const t = text.trim() + return t === '(该平台未获得回复)' || t === '(该平台未获得回复)' || t === '[无回复]' + } + // 尝试按问题编号分割回答 const splitAnswerByQuestions = (answerText, questionCount) => { if (!answerText || questionCount <= 0) return [answerText] - - // 更健壮的分割逻辑:查找所有 "数字." 格式的编号位置 - // 支持格式: - // - "1. \n内容" (数字+点+空格+换行+内容) - // - "\n\n2. \n内容" (换行+数字+点+空格+换行+内容) - // 使用更宽松的匹配:开头或换行后的数字+点+空白 - const numberPattern = /(?:^|[\r\n]+)(\d+)\.\s+/g - const matches = [] + if (isPlaceholderText(answerText)) return [''] + + // 支持两种编号格式: + // 1. "问题X:" 或 "问题X:" (中文格式,后端新格式) + // 2. "1. " 或 "\n1. " (数字+点,旧格式兼容) + let matches = [] let match - - while ((match = numberPattern.exec(answerText)) !== null) { + + // 优先尝试 "问题X:" 格式 + const cnPattern = /(?:^|[\r\n]+)问题(\d+)[::]\s*/g + while ((match = cnPattern.exec(answerText)) !== null) { matches.push({ num: parseInt(match[1]), index: match.index, fullMatch: match[0] }) } - + + // 如果没匹配到,回退到 "数字." 格式 + if (matches.length === 0) { + const numPattern = /(?:^|[\r\n]+)(\d+)\.\s+/g + while ((match = numPattern.exec(answerText)) !== null) { + matches.push({ + num: parseInt(match[1]), + index: match.index, + fullMatch: match[0] + }) + } + } + // 如果没有找到编号或只找到一个,返回整体 if (matches.length <= 1) { - // 尝试移除开头的编号(格式:1. \n 或 1. ) - const cleaned = answerText.replace(/^\d+\.\s+/, '').trim() + const cleaned = answerText + .replace(/^问题\d+[::]\s*/, '') + .replace(/^\d+\.\s+/, '') + .trim() return [cleaned || answerText] } - + // 按编号提取各部分 const parts = [] for (let i = 0; i < matches.length; i++) { const current = matches[i] const next = matches[i + 1] - + const startIdx = current.index + current.fullMatch.length const endIdx = next ? next.index : answerText.length - + let part = answerText.substring(startIdx, endIdx).trim() - // 移除末尾可能的多余换行 part = part.replace(/[\r\n]+$/, '').trim() parts.push(part) } - - // 如果分割成功且数量合理,返回分割结果 + if (parts.length > 0 && parts.some(p => p)) { return parts } - + return [answerText] } // 获取某个问题对应的回答 const getAnswerForQuestion = (interview, qIdx, platform) => { const answer = platform === 'twitter' ? interview.twitterAnswer : (interview.redditAnswer || interview.twitterAnswer) - if (!answer) return '' - + if (!answer || isPlaceholderText(answer)) return answer || '' + const questionCount = interview.questions?.length || 1 const answers = splitAnswerByQuestions(answer, questionCount) - - // 如果只有一个回答部分,或者索引超出,返回完整回答 - if (answers.length === 1 || qIdx >= answers.length) { - return qIdx === 0 ? answer : '' + + // 分割成功且索引有效 + if (answers.length > 1 && qIdx < answers.length) { + return answers[qIdx] || '' } - - return answers[qIdx] || '' + + // 分割失败:第一个问题返回完整回答,其余返回空 + return qIdx === 0 ? answer : '' } - // 检查某个问题是否有双平台回答 + // 检查某个问题是否有双平台回答(过滤占位文本) const hasMultiplePlatforms = (interview, qIdx) => { if (!interview.twitterAnswer || !interview.redditAnswer) return false const twitterAnswer = getAnswerForQuestion(interview, qIdx, 'twitter') const redditAnswer = getAnswerForQuestion(interview, qIdx, 'reddit') - return twitterAnswer && redditAnswer && twitterAnswer !== redditAnswer + // 两个平台都有真实回答(非占位文本)且内容不同 + return !isPlaceholderText(twitterAnswer) && !isPlaceholderText(redditAnswer) && twitterAnswer !== redditAnswer } return () => h('div', { class: 'interview-display' }, [ @@ -1453,7 +1483,8 @@ const InterviewDisplay = { const hasDualPlatform = hasMultiplePlatforms(interview, qIdx) const expandKey = `${activeIndex.value}-${qIdx}` const isExpanded = expandedAnswers.value.has(expandKey) - + const isPlaceholder = isPlaceholderText(answerText) + return h('div', { class: 'qa-pair', key: qIdx }, [ // Question Block h('div', { class: 'qa-question' }, [ @@ -1463,14 +1494,14 @@ const InterviewDisplay = { h('div', { class: 'qa-text' }, question) ]) ]), - + // Answer Block - answerText && h('div', { class: 'qa-answer' }, [ + answerText && h('div', { class: ['qa-answer', { 'answer-placeholder': isPlaceholder }] }, [ h('div', { class: 'qa-badge a-badge' }, `A${qIdx + 1}`), h('div', { class: 'qa-content' }, [ h('div', { class: 'qa-answer-header' }, [ h('div', { class: 'qa-sender' }, interview?.name || 'Agent'), - // 双平台切换按钮 + // 双平台切换按钮(仅在有真实双平台回答时显示) hasDualPlatform && h('div', { class: 'platform-switch' }, [ h('button', { class: ['platform-btn', { active: currentPlatform === 'twitter' }], @@ -1494,14 +1525,16 @@ const InterviewDisplay = { ]) ]) ]), - h('div', { - class: 'qa-text answer-text', - innerHTML: formatAnswer(answerText, isExpanded) - .replace(/\*\*(.+?)\*\*/g, '$1') - .replace(/\n/g, '
') + h('div', { + class: ['qa-text', 'answer-text', { 'placeholder-text': isPlaceholder }], + innerHTML: isPlaceholder + ? answerText + : formatAnswer(answerText, isExpanded) + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/\n/g, '
') }), - // Expand/Collapse Button - answerText.length > 400 && h('button', { + // Expand/Collapse Button(占位文本不显示) + !isPlaceholder && answerText.length > 400 && h('button', { class: 'expand-answer-btn', onClick: () => toggleAnswer(expandKey) }, isExpanded ? 'Show Less' : 'Show More') @@ -3913,6 +3946,15 @@ watch(() => props.reportId, (newId) => { margin-top: 0; } +:deep(.interview-display .answer-placeholder) { + opacity: 0.6; +} + +:deep(.interview-display .placeholder-text) { + font-style: italic; + color: #9CA3AF; +} + :deep(.interview-display .qa-answer-header) { display: flex; justify-content: space-between;