MiroFish/backend/app/services/simulation_config_generator.py
666ghj af5c235695 Enhance OASIS simulation capabilities and profile generation
- Updated README.md to include detailed descriptions of new features, including Zep mixed search functionality and detailed persona generation for individual and group entities.
- Implemented a robust mechanism for checking simulation preparation status to avoid redundant profile generation.
- Added support for parallel profile generation, improving efficiency in creating OASIS Agent Profiles.
- Enhanced the simulation configuration generator to adopt a stepwise approach, ensuring better handling of complex configurations.
- Introduced error handling and retry mechanisms for LLM calls, improving the reliability of profile generation.
- Updated simulation management to support new API parameters for controlling profile generation behavior.
2025-12-01 19:40:07 +08:00

809 lines
30 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
模拟配置智能生成器
使用LLM根据模拟需求、文档内容、图谱信息自动生成细致的模拟参数
实现全程自动化,无需人工设置参数
采用分步生成策略,避免一次性生成过长内容导致失败:
1. 生成时间配置
2. 生成事件配置
3. 分批生成Agent配置
4. 生成平台配置
"""
import json
import math
from typing import Dict, Any, List, Optional, Callable
from dataclasses import dataclass, field, asdict
from datetime import datetime
from openai import OpenAI
from ..config import Config
from ..utils.logger import get_logger
from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config')
# 中国作息时间配置(北京时间)
CHINA_TIMEZONE_CONFIG = {
# 深夜时段(几乎无人活动)
"dead_hours": [0, 1, 2, 3, 4, 5],
# 早间时段(逐渐醒来)
"morning_hours": [6, 7, 8],
# 工作时段
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
# 晚间高峰(最活跃)
"peak_hours": [19, 20, 21, 22],
# 夜间时段(活跃度下降)
"night_hours": [23],
# 活跃度系数
"activity_multipliers": {
"dead": 0.05, # 凌晨几乎无人
"morning": 0.4, # 早间逐渐活跃
"work": 0.7, # 工作时段中等
"peak": 1.5, # 晚间高峰
"night": 0.5 # 深夜下降
}
}
@dataclass
class AgentActivityConfig:
"""单个Agent的活动配置"""
agent_id: int
entity_uuid: str
entity_name: str
entity_type: str
# 活跃度配置 (0.0-1.0)
activity_level: float = 0.5 # 整体活跃度
# 发言频率(每小时预期发言次数)
posts_per_hour: float = 1.0
comments_per_hour: float = 2.0
# 活跃时间段24小时制0-23
active_hours: List[int] = field(default_factory=lambda: list(range(8, 23)))
# 响应速度(对热点事件的反应延迟,单位:模拟分钟)
response_delay_min: int = 5
response_delay_max: int = 60
# 情感倾向 (-1.0到1.0,负面到正面)
sentiment_bias: float = 0.0
# 立场(对特定话题的态度)
stance: str = "neutral" # supportive, opposing, neutral, observer
# 影响力权重决定其发言被其他Agent看到的概率
influence_weight: float = 1.0
@dataclass
class TimeSimulationConfig:
"""时间模拟配置(基于中国人作息习惯)"""
# 模拟总时长(模拟小时数)
total_simulation_hours: int = 72 # 默认模拟72小时3天
# 每轮代表的时间(模拟分钟)
minutes_per_round: int = 30
# 每小时激活的Agent数量范围
agents_per_hour_min: int = 5
agents_per_hour_max: int = 20
# 高峰时段晚间19-22点中国人最活跃的时间
peak_hours: List[int] = field(default_factory=lambda: [19, 20, 21, 22])
peak_activity_multiplier: float = 1.5
# 低谷时段凌晨0-5点几乎无人活动
off_peak_hours: List[int] = field(default_factory=lambda: [0, 1, 2, 3, 4, 5])
off_peak_activity_multiplier: float = 0.05 # 凌晨活跃度极低
# 早间时段
morning_hours: List[int] = field(default_factory=lambda: [6, 7, 8])
morning_activity_multiplier: float = 0.4
# 工作时段
work_hours: List[int] = field(default_factory=lambda: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
work_activity_multiplier: float = 0.7
@dataclass
class EventConfig:
"""事件配置"""
# 初始事件(模拟开始时的触发事件)
initial_posts: List[Dict[str, Any]] = field(default_factory=list)
# 定时事件(在特定时间触发的事件)
scheduled_events: List[Dict[str, Any]] = field(default_factory=list)
# 热点话题关键词
hot_topics: List[str] = field(default_factory=list)
# 舆论引导方向
narrative_direction: str = ""
@dataclass
class PlatformConfig:
"""平台特定配置"""
platform: str # twitter or reddit
# 推荐算法权重
recency_weight: float = 0.4 # 时间新鲜度
popularity_weight: float = 0.3 # 热度
relevance_weight: float = 0.3 # 相关性
# 病毒传播阈值(达到多少互动后触发扩散)
viral_threshold: int = 10
# 回声室效应强度(相似观点聚集程度)
echo_chamber_strength: float = 0.5
@dataclass
class SimulationParameters:
"""完整的模拟参数配置"""
# 基础信息
simulation_id: str
project_id: str
graph_id: str
simulation_requirement: str
# 时间配置
time_config: TimeSimulationConfig = field(default_factory=TimeSimulationConfig)
# Agent配置列表
agent_configs: List[AgentActivityConfig] = field(default_factory=list)
# 事件配置
event_config: EventConfig = field(default_factory=EventConfig)
# 平台配置
twitter_config: Optional[PlatformConfig] = None
reddit_config: Optional[PlatformConfig] = None
# LLM配置
llm_model: str = ""
llm_base_url: str = ""
# 生成元数据
generated_at: str = field(default_factory=lambda: datetime.now().isoformat())
generation_reasoning: str = "" # LLM的推理说明
def to_dict(self) -> Dict[str, Any]:
"""转换为字典"""
time_dict = asdict(self.time_config)
return {
"simulation_id": self.simulation_id,
"project_id": self.project_id,
"graph_id": self.graph_id,
"simulation_requirement": self.simulation_requirement,
"time_config": time_dict,
"agent_configs": [asdict(a) for a in self.agent_configs],
"event_config": asdict(self.event_config),
"twitter_config": asdict(self.twitter_config) if self.twitter_config else None,
"reddit_config": asdict(self.reddit_config) if self.reddit_config else None,
"llm_model": self.llm_model,
"llm_base_url": self.llm_base_url,
"generated_at": self.generated_at,
"generation_reasoning": self.generation_reasoning,
}
def to_json(self, indent: int = 2) -> str:
"""转换为JSON字符串"""
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
class SimulationConfigGenerator:
"""
模拟配置智能生成器
使用LLM分析模拟需求、文档内容、图谱实体信息
自动生成最佳的模拟参数配置
采用分步生成策略:
1. 生成时间配置和事件配置(轻量级)
2. 分批生成Agent配置每批10-15个
3. 生成平台配置
"""
# 上下文最大字符数
MAX_CONTEXT_LENGTH = 50000
# 每批生成的Agent数量
AGENTS_PER_BATCH = 15
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None
):
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model_name = model_name or Config.LLM_MODEL_NAME
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
def generate_config(
self,
simulation_id: str,
project_id: str,
graph_id: str,
simulation_requirement: str,
document_text: str,
entities: List[EntityNode],
enable_twitter: bool = True,
enable_reddit: bool = True,
progress_callback: Optional[Callable[[int, int, str], None]] = None,
) -> SimulationParameters:
"""
智能生成完整的模拟配置(分步生成)
Args:
simulation_id: 模拟ID
project_id: 项目ID
graph_id: 图谱ID
simulation_requirement: 模拟需求描述
document_text: 原始文档内容
entities: 过滤后的实体列表
enable_twitter: 是否启用Twitter
enable_reddit: 是否启用Reddit
progress_callback: 进度回调函数(current_step, total_steps, message)
Returns:
SimulationParameters: 完整的模拟参数
"""
logger.info(f"开始智能生成模拟配置: simulation_id={simulation_id}, 实体数={len(entities)}")
# 计算总步骤数
num_batches = math.ceil(len(entities) / self.AGENTS_PER_BATCH)
total_steps = 3 + num_batches # 时间配置 + 事件配置 + N批Agent + 平台配置
current_step = 0
def report_progress(step: int, message: str):
nonlocal current_step
current_step = step
if progress_callback:
progress_callback(step, total_steps, message)
logger.info(f"[{step}/{total_steps}] {message}")
# 1. 构建基础上下文信息
context = self._build_context(
simulation_requirement=simulation_requirement,
document_text=document_text,
entities=entities
)
reasoning_parts = []
# ========== 步骤1: 生成时间配置 ==========
report_progress(1, "生成时间配置...")
time_config_result = self._generate_time_config(context, len(entities))
time_config = self._parse_time_config(time_config_result)
reasoning_parts.append(f"时间配置: {time_config_result.get('reasoning', '成功')}")
# ========== 步骤2: 生成事件配置 ==========
report_progress(2, "生成事件配置和热点话题...")
event_config_result = self._generate_event_config(context, simulation_requirement)
event_config = self._parse_event_config(event_config_result)
reasoning_parts.append(f"事件配置: {event_config_result.get('reasoning', '成功')}")
# ========== 步骤3-N: 分批生成Agent配置 ==========
all_agent_configs = []
for batch_idx in range(num_batches):
start_idx = batch_idx * self.AGENTS_PER_BATCH
end_idx = min(start_idx + self.AGENTS_PER_BATCH, len(entities))
batch_entities = entities[start_idx:end_idx]
report_progress(
3 + batch_idx,
f"生成Agent配置 ({start_idx + 1}-{end_idx}/{len(entities)})..."
)
batch_configs = self._generate_agent_configs_batch(
context=context,
entities=batch_entities,
start_idx=start_idx,
simulation_requirement=simulation_requirement
)
all_agent_configs.extend(batch_configs)
reasoning_parts.append(f"Agent配置: 成功生成 {len(all_agent_configs)}")
# ========== 最后一步: 生成平台配置 ==========
report_progress(total_steps, "生成平台配置...")
twitter_config = None
reddit_config = None
if enable_twitter:
twitter_config = PlatformConfig(
platform="twitter",
recency_weight=0.4,
popularity_weight=0.3,
relevance_weight=0.3,
viral_threshold=10,
echo_chamber_strength=0.5
)
if enable_reddit:
reddit_config = PlatformConfig(
platform="reddit",
recency_weight=0.3,
popularity_weight=0.4,
relevance_weight=0.3,
viral_threshold=15,
echo_chamber_strength=0.6
)
# 构建最终参数
params = SimulationParameters(
simulation_id=simulation_id,
project_id=project_id,
graph_id=graph_id,
simulation_requirement=simulation_requirement,
time_config=time_config,
agent_configs=all_agent_configs,
event_config=event_config,
twitter_config=twitter_config,
reddit_config=reddit_config,
llm_model=self.model_name,
llm_base_url=self.base_url,
generation_reasoning=" | ".join(reasoning_parts)
)
logger.info(f"模拟配置生成完成: {len(params.agent_configs)} 个Agent配置")
return params
def _build_context(
self,
simulation_requirement: str,
document_text: str,
entities: List[EntityNode]
) -> str:
"""构建LLM上下文截断到最大长度"""
# 实体摘要
entity_summary = self._summarize_entities(entities)
# 构建上下文
context_parts = [
f"## 模拟需求\n{simulation_requirement}",
f"\n## 实体信息 ({len(entities)}个)\n{entity_summary}",
]
current_length = sum(len(p) for p in context_parts)
remaining_length = self.MAX_CONTEXT_LENGTH - current_length - 500 # 留500字符余量
if remaining_length > 0 and document_text:
doc_text = document_text[:remaining_length]
if len(document_text) > remaining_length:
doc_text += "\n...(文档已截断)"
context_parts.append(f"\n## 原始文档内容\n{doc_text}")
return "\n".join(context_parts)
def _summarize_entities(self, entities: List[EntityNode]) -> str:
"""生成实体摘要"""
lines = []
# 按类型分组
by_type: Dict[str, List[EntityNode]] = {}
for e in entities:
t = e.get_entity_type() or "Unknown"
if t not in by_type:
by_type[t] = []
by_type[t].append(e)
for entity_type, type_entities in by_type.items():
lines.append(f"\n### {entity_type} ({len(type_entities)}个)")
for e in type_entities[:10]: # 每类最多显示10个
summary_preview = (e.summary[:100] + "...") if len(e.summary) > 100 else e.summary
lines.append(f"- {e.name}: {summary_preview}")
if len(type_entities) > 10:
lines.append(f" ... 还有 {len(type_entities) - 10}")
return "\n".join(lines)
def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
"""带重试的LLM调用包含JSON修复逻辑"""
import re
max_attempts = 3
last_error = None
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
# 不设置max_tokens让LLM自由发挥
)
content = response.choices[0].message.content
finish_reason = response.choices[0].finish_reason
# 检查是否被截断
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
content = self._fix_truncated_json(content)
# 尝试解析JSON
try:
return json.loads(content)
except json.JSONDecodeError as e:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")
# 尝试修复JSON
fixed = self._try_fix_config_json(content)
if fixed:
return fixed
last_error = e
except Exception as e:
logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}")
last_error = e
import time
time.sleep(2 * (attempt + 1))
raise last_error or Exception("LLM调用失败")
def _fix_truncated_json(self, content: str) -> str:
"""修复被截断的JSON"""
content = content.strip()
# 计算未闭合的括号
open_braces = content.count('{') - content.count('}')
open_brackets = content.count('[') - content.count(']')
# 检查是否有未闭合的字符串
if content and content[-1] not in '",}]':
content += '"'
# 闭合括号
content += ']' * open_brackets
content += '}' * open_braces
return content
def _try_fix_config_json(self, content: str) -> Optional[Dict[str, Any]]:
"""尝试修复配置JSON"""
import re
# 修复被截断的情况
content = self._fix_truncated_json(content)
# 提取JSON部分
json_match = re.search(r'\{[\s\S]*\}', content)
if json_match:
json_str = json_match.group()
# 移除字符串中的换行符
def fix_string(match):
s = match.group(0)
s = s.replace('\n', ' ').replace('\r', ' ')
s = re.sub(r'\s+', ' ', s)
return s
json_str = re.sub(r'"[^"\\]*(?:\\.[^"\\]*)*"', fix_string, json_str)
try:
return json.loads(json_str)
except:
# 尝试移除所有控制字符
json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', json_str)
json_str = re.sub(r'\s+', ' ', json_str)
try:
return json.loads(json_str)
except:
pass
return None
def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, Any]:
"""生成时间配置"""
prompt = f"""基于以下模拟需求,生成时间模拟配置。
{context[:5000]}
## 任务
请生成时间配置JSON注意
- 用户群体为中国人,需符合北京时间作息习惯
- 凌晨0-5点几乎无人活动活跃度系数0.05
- 早上6-8点逐渐活跃活跃度系数0.4
- 工作时间9-18点中等活跃活跃度系数0.7
- 晚间19-22点是高峰期活跃度系数1.5
- 23点后活跃度下降活跃度系数0.5
当前实体数量: {num_entities}
返回JSON格式不要markdown
{{
"total_simulation_hours": <72-168根据事件性质决定>,
"minutes_per_round": <15-60>,
"agents_per_hour_min": <每小时最少激活Agent数>,
"agents_per_hour_max": <每小时最多激活Agent数>,
"peak_hours": [19, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "<简要说明>"
}}"""
system_prompt = "你是社交媒体模拟专家。返回纯JSON格式时间配置需符合中国人作息习惯。"
try:
return self._call_llm_with_retry(prompt, system_prompt)
except Exception as e:
logger.warning(f"时间配置LLM生成失败: {e}, 使用默认配置")
return self._get_default_time_config(num_entities)
def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
"""获取默认时间配置(中国人作息)"""
return {
"total_simulation_hours": 72,
"minutes_per_round": 30,
"agents_per_hour_min": max(1, num_entities // 15),
"agents_per_hour_max": max(5, num_entities // 5),
"peak_hours": [19, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "使用默认中国人作息配置"
}
def _parse_time_config(self, result: Dict[str, Any]) -> TimeSimulationConfig:
"""解析时间配置结果"""
return TimeSimulationConfig(
total_simulation_hours=result.get("total_simulation_hours", 72),
minutes_per_round=result.get("minutes_per_round", 30),
agents_per_hour_min=result.get("agents_per_hour_min", 5),
agents_per_hour_max=result.get("agents_per_hour_max", 20),
peak_hours=result.get("peak_hours", [19, 20, 21, 22]),
off_peak_hours=result.get("off_peak_hours", [0, 1, 2, 3, 4, 5]),
off_peak_activity_multiplier=0.05, # 凌晨几乎无人
morning_hours=result.get("morning_hours", [6, 7, 8]),
morning_activity_multiplier=0.4,
work_hours=result.get("work_hours", list(range(9, 19))),
work_activity_multiplier=0.7,
peak_activity_multiplier=1.5
)
def _generate_event_config(self, context: str, simulation_requirement: str) -> Dict[str, Any]:
"""生成事件配置"""
prompt = f"""基于以下模拟需求,生成事件配置。
模拟需求: {simulation_requirement}
{context[:3000]}
## 任务
请生成事件配置JSON
- 提取热点话题关键词
- 描述舆论发展方向
- 设计初始帖子内容
返回JSON格式不要markdown
{{
"hot_topics": ["关键词1", "关键词2", ...],
"narrative_direction": "<舆论发展方向描述>",
"initial_posts": [
{{"content": "帖子内容", "poster_type": "MediaOutlet"}},
...
],
"reasoning": "<简要说明>"
}}"""
system_prompt = "你是舆论分析专家。返回纯JSON格式。"
try:
return self._call_llm_with_retry(prompt, system_prompt)
except Exception as e:
logger.warning(f"事件配置LLM生成失败: {e}, 使用默认配置")
return {
"hot_topics": [],
"narrative_direction": "",
"initial_posts": [],
"reasoning": "使用默认配置"
}
def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig:
"""解析事件配置结果"""
return EventConfig(
initial_posts=result.get("initial_posts", []),
scheduled_events=[],
hot_topics=result.get("hot_topics", []),
narrative_direction=result.get("narrative_direction", "")
)
def _generate_agent_configs_batch(
self,
context: str,
entities: List[EntityNode],
start_idx: int,
simulation_requirement: str
) -> List[AgentActivityConfig]:
"""分批生成Agent配置"""
# 构建实体信息
entity_list = []
for i, e in enumerate(entities):
entity_list.append({
"agent_id": start_idx + i,
"entity_name": e.name,
"entity_type": e.get_entity_type() or "Unknown",
"summary": e.summary[:150] if e.summary else ""
})
prompt = f"""基于以下信息,为每个实体生成社交媒体活动配置。
模拟需求: {simulation_requirement}
## 实体列表
```json
{json.dumps(entity_list, ensure_ascii=False, indent=2)}
```
## 任务
为每个实体生成活动配置,注意:
- **时间符合中国人作息**凌晨0-5点几乎不活动晚间19-22点最活跃
- **官方机构**University/GovernmentAgency活跃度低(0.1-0.3),工作时间(9-17)活动,响应慢(60-240分钟),影响力高(2.5-3.0)
- **媒体**MediaOutlet活跃度中(0.4-0.6),全天活动(8-23),响应快(5-30分钟),影响力高(2.0-2.5)
- **个人**Student/Person/Alumni活跃度高(0.6-0.9),主要晚间活动(18-23),响应快(1-15分钟),影响力低(0.8-1.2)
- **公众人物/专家**:活跃度中(0.4-0.6),影响力中高(1.5-2.0)
返回JSON格式不要markdown
{{
"agent_configs": [
{{
"agent_id": <必须与输入一致>,
"activity_level": <0.0-1.0>,
"posts_per_hour": <发帖频率>,
"comments_per_hour": <评论频率>,
"active_hours": [<活跃小时列表,考虑中国人作息>],
"response_delay_min": <最小响应延迟分钟>,
"response_delay_max": <最大响应延迟分钟>,
"sentiment_bias": <-1.0到1.0>,
"stance": "<supportive/opposing/neutral/observer>",
"influence_weight": <影响力权重>
}},
...
]
}}"""
system_prompt = "你是社交媒体行为分析专家。返回纯JSON配置需符合中国人作息习惯。"
try:
result = self._call_llm_with_retry(prompt, system_prompt)
llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])}
except Exception as e:
logger.warning(f"Agent配置批次LLM生成失败: {e}, 使用规则生成")
llm_configs = {}
# 构建AgentActivityConfig对象
configs = []
for i, entity in enumerate(entities):
agent_id = start_idx + i
cfg = llm_configs.get(agent_id, {})
# 如果LLM没有生成使用规则生成
if not cfg:
cfg = self._generate_agent_config_by_rule(entity)
config = AgentActivityConfig(
agent_id=agent_id,
entity_uuid=entity.uuid,
entity_name=entity.name,
entity_type=entity.get_entity_type() or "Unknown",
activity_level=cfg.get("activity_level", 0.5),
posts_per_hour=cfg.get("posts_per_hour", 0.5),
comments_per_hour=cfg.get("comments_per_hour", 1.0),
active_hours=cfg.get("active_hours", list(range(9, 23))),
response_delay_min=cfg.get("response_delay_min", 5),
response_delay_max=cfg.get("response_delay_max", 60),
sentiment_bias=cfg.get("sentiment_bias", 0.0),
stance=cfg.get("stance", "neutral"),
influence_weight=cfg.get("influence_weight", 1.0)
)
configs.append(config)
return configs
def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]:
"""基于规则生成单个Agent配置中国人作息"""
entity_type = (entity.get_entity_type() or "Unknown").lower()
if entity_type in ["university", "governmentagency", "ngo"]:
# 官方机构:工作时间活动,低频率,高影响力
return {
"activity_level": 0.2,
"posts_per_hour": 0.1,
"comments_per_hour": 0.05,
"active_hours": list(range(9, 18)), # 9:00-17:59
"response_delay_min": 60,
"response_delay_max": 240,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 3.0
}
elif entity_type in ["mediaoutlet"]:
# 媒体:全天活动,中等频率,高影响力
return {
"activity_level": 0.5,
"posts_per_hour": 0.8,
"comments_per_hour": 0.3,
"active_hours": list(range(7, 24)), # 7:00-23:59
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "observer",
"influence_weight": 2.5
}
elif entity_type in ["professor", "expert", "official"]:
# 专家/教授:工作+晚间活动,中等频率
return {
"activity_level": 0.4,
"posts_per_hour": 0.3,
"comments_per_hour": 0.5,
"active_hours": list(range(8, 22)), # 8:00-21:59
"response_delay_min": 15,
"response_delay_max": 90,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 2.0
}
elif entity_type in ["student"]:
# 学生:晚间为主,高频率
return {
"activity_level": 0.8,
"posts_per_hour": 0.6,
"comments_per_hour": 1.5,
"active_hours": [8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # 上午+晚间
"response_delay_min": 1,
"response_delay_max": 15,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 0.8
}
elif entity_type in ["alumni"]:
# 校友:晚间为主
return {
"activity_level": 0.6,
"posts_per_hour": 0.4,
"comments_per_hour": 0.8,
"active_hours": [12, 13, 19, 20, 21, 22, 23], # 午休+晚间
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}
else:
# 普通人:晚间高峰
return {
"activity_level": 0.7,
"posts_per_hour": 0.5,
"comments_per_hour": 1.2,
"active_hours": [9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # 白天+晚间
"response_delay_min": 2,
"response_delay_max": 20,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}