MiroFish/backend/scripts/run_parallel_simulation.py

"""
OASIS 双平台并行模拟预设脚本
同时运行Twitter和Reddit模拟，读取相同的配置文件

使用方式:
    python run_parallel_simulation.py --config simulation_config.json [--action-log actions.jsonl]
"""

import argparse
import asyncio
import json
import logging
import os
import random
import sys
from datetime import datetime
from typing import Dict, Any, List, Optional

# 添加 backend 目录到路径
# 脚本固定位于 backend/scripts/ 目录
_scripts_dir = os.path.dirname(os.path.abspath(__file__))
_backend_dir = os.path.abspath(os.path.join(_scripts_dir, '..'))
_project_root = os.path.abspath(os.path.join(_backend_dir, '..'))
sys.path.insert(0, _scripts_dir)
sys.path.insert(0, _backend_dir)

# 加载项目根目录的 .env 文件（包含 LLM_API_KEY 等配置）
from dotenv import load_dotenv
_env_file = os.path.join(_project_root, '.env')
if os.path.exists(_env_file):
    load_dotenv(_env_file)
    print(f"已加载环境配置: {_env_file}")
else:
    # 尝试加载 backend/.env
    _backend_env = os.path.join(_backend_dir, '.env')
    if os.path.exists(_backend_env):
        load_dotenv(_backend_env)
        print(f"已加载环境配置: {_backend_env}")


class UnicodeFormatter(logging.Formatter):
    """
    自定义格式化器，将 Unicode 转义序列（如 \\uXXXX）转换为可读字符
    """

    # 匹配 \uXXXX 形式的 Unicode 转义序列
    UNICODE_ESCAPE_PATTERN = None

    @classmethod
    def _get_pattern(cls):
        if cls.UNICODE_ESCAPE_PATTERN is None:
            import re
            cls.UNICODE_ESCAPE_PATTERN = re.compile(r'\\u([0-9a-fA-F]{4})')
        return cls.UNICODE_ESCAPE_PATTERN

    def format(self, record):
        # 先获取原始格式化结果
        result = super().format(record)
        # 使用正则表达式替换 Unicode 转义序列
        pattern = self._get_pattern()

        def replace_unicode(match):
            try:
                return chr(int(match.group(1), 16))
            except (ValueError, OverflowError):
                return match.group(0)

        return pattern.sub(replace_unicode, result)


def setup_oasis_logging(log_dir: str):
    """
    配置 OASIS 的日志，覆盖默认的带时间戳日志文件

    Args:
        log_dir: 日志目录路径
    """
    os.makedirs(log_dir, exist_ok=True)

    # 清理旧的日志文件
    for f in os.listdir(log_dir):
        old_log = os.path.join(log_dir, f)
        if os.path.isfile(old_log) and f.endswith('.log'):
            try:
                os.remove(old_log)
            except OSError:
                pass

    # 创建自定义格式化器（支持 Unicode 解码）
    formatter = UnicodeFormatter(
        "%(levelname)s - %(asctime)s - %(name)s - %(message)s"
    )

    # 重新配置 OASIS 使用的日志器，使用固定名称（不带时间戳）
    loggers_config = {
        "social.agent": os.path.join(log_dir, "social.agent.log"),
        "social.twitter": os.path.join(log_dir, "social.twitter.log"),
        "social.rec": os.path.join(log_dir, "social.rec.log"),
        "oasis.env": os.path.join(log_dir, "oasis.env.log"),
        "table": os.path.join(log_dir, "table.log"),
    }

    for logger_name, log_file in loggers_config.items():
        logger = logging.getLogger(logger_name)
        logger.setLevel(logging.DEBUG)
        # 清除 OASIS 添加的现有处理器（带时间戳的日志文件）
        logger.handlers.clear()
        # 添加新的文件处理器（使用 UTF-8 编码，固定文件名）
        file_handler = logging.FileHandler(log_file, encoding='utf-8', mode='w')
        file_handler.setLevel(logging.DEBUG)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
        # 防止日志向上传播（避免重复）
        logger.propagate = False

    print(f"日志配置完成，日志目录: {log_dir}")


def init_logging_for_simulation(simulation_dir: str):
    """初始化模拟的日志配置"""
    log_dir = os.path.join(simulation_dir, "log")
    setup_oasis_logging(log_dir)


from action_logger import ActionLogger

try:
    from camel.models import ModelFactory
    from camel.types import ModelPlatformType
    import oasis
    from oasis import (
        ActionType,
        LLMAction,
        ManualAction,
        generate_twitter_agent_graph,
        generate_reddit_agent_graph
    )
except ImportError as e:
    print(f"错误: 缺少依赖 {e}")
    print("请先安装: pip install oasis-ai camel-ai")
    sys.exit(1)


# Twitter可用动作
TWITTER_ACTIONS = [
    ActionType.CREATE_POST,
    ActionType.LIKE_POST,
    ActionType.REPOST,
    ActionType.FOLLOW,
    ActionType.DO_NOTHING,
    ActionType.QUOTE_POST,
]

# Reddit可用动作
REDDIT_ACTIONS = [
    ActionType.LIKE_POST,
    ActionType.DISLIKE_POST,
    ActionType.CREATE_POST,
    ActionType.CREATE_COMMENT,
    ActionType.LIKE_COMMENT,
    ActionType.DISLIKE_COMMENT,
    ActionType.SEARCH_POSTS,
    ActionType.SEARCH_USER,
    ActionType.TREND,
    ActionType.REFRESH,
    ActionType.DO_NOTHING,
    ActionType.FOLLOW,
    ActionType.MUTE,
]


def load_config(config_path: str) -> Dict[str, Any]:
    """加载配置文件"""
    with open(config_path, 'r', encoding='utf-8') as f:
        return json.load(f)


def create_model(config: Dict[str, Any]):
    """
    创建LLM模型

    统一使用项目根目录 .env 文件中的配置（优先级最高）：
    - LLM_API_KEY: API密钥
    - LLM_BASE_URL: API基础URL
    - LLM_MODEL_NAME: 模型名称

    OASIS使用camel-ai的ModelFactory，需要设置 OPENAI_API_KEY 和 OPENAI_API_BASE_URL 环境变量
    """
    # 优先从 .env 读取配置
    llm_api_key = os.environ.get("LLM_API_KEY", "")
    llm_base_url = os.environ.get("LLM_BASE_URL", "")
    llm_model = os.environ.get("LLM_MODEL_NAME", "")

    # 如果 .env 中没有，则使用 config 作为备用
    if not llm_model:
        llm_model = config.get("llm_model", "gpt-4o-mini")

    # 设置 camel-ai 所需的环境变量
    if llm_api_key:
        os.environ["OPENAI_API_KEY"] = llm_api_key

    if not os.environ.get("OPENAI_API_KEY"):
        raise ValueError("缺少 API Key 配置，请在项目根目录 .env 文件中设置 LLM_API_KEY")

    if llm_base_url:
        os.environ["OPENAI_API_BASE_URL"] = llm_base_url

    print(f"LLM配置: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...")

    return ModelFactory.create(
        model_platform=ModelPlatformType.OPENAI,
        model_type=llm_model,
    )


def get_active_agents_for_round(
    env,
    config: Dict[str, Any],
    current_hour: int,
    round_num: int
) -> List:
    """根据时间和配置决定本轮激活哪些Agent"""
    time_config = config.get("time_config", {})
    agent_configs = config.get("agent_configs", [])

    base_min = time_config.get("agents_per_hour_min", 5)
    base_max = time_config.get("agents_per_hour_max", 20)

    peak_hours = time_config.get("peak_hours", [9, 10, 11, 14, 15, 20, 21, 22])
    off_peak_hours = time_config.get("off_peak_hours", [0, 1, 2, 3, 4, 5])

    if current_hour in peak_hours:
        multiplier = time_config.get("peak_activity_multiplier", 1.5)
    elif current_hour in off_peak_hours:
        multiplier = time_config.get("off_peak_activity_multiplier", 0.3)
    else:
        multiplier = 1.0

    target_count = int(random.uniform(base_min, base_max) * multiplier)

    candidates = []
    for cfg in agent_configs:
        agent_id = cfg.get("agent_id", 0)
        active_hours = cfg.get("active_hours", list(range(8, 23)))
        activity_level = cfg.get("activity_level", 0.5)

        if current_hour not in active_hours:
            continue

        if random.random() < activity_level:
            candidates.append(agent_id)

    selected_ids = random.sample(
        candidates,
        min(target_count, len(candidates))
    ) if candidates else []

    active_agents = []
    for agent_id in selected_ids:
        try:
            agent = env.agent_graph.get_agent(agent_id)
            active_agents.append((agent_id, agent))
        except Exception:
            pass

    return active_agents


async def run_twitter_simulation(
    config: Dict[str, Any],
    simulation_dir: str,
    action_logger: Optional[ActionLogger] = None
):
    """运行Twitter模拟"""
    print("[Twitter] 初始化...")

    model = create_model(config)

    # OASIS Twitter使用CSV格式
    profile_path = os.path.join(simulation_dir, "twitter_profiles.csv")
    if not os.path.exists(profile_path):
        print(f"[Twitter] 错误: Profile文件不存在: {profile_path}")
        return

    agent_graph = await generate_twitter_agent_graph(
        profile_path=profile_path,
        model=model,
        available_actions=TWITTER_ACTIONS,
    )

    # 获取Agent名称映射
    agent_names = {}
    for agent_id, agent in agent_graph.get_agents():
        agent_names[agent_id] = getattr(agent, 'name', f'Agent_{agent_id}')

    db_path = os.path.join(simulation_dir, "twitter_simulation.db")
    if os.path.exists(db_path):
        os.remove(db_path)

    env = oasis.make(
        agent_graph=agent_graph,
        platform=oasis.DefaultPlatformType.TWITTER,
        database_path=db_path,
    )

    await env.reset()
    print("[Twitter] 环境已启动")

    if action_logger:
        action_logger.log_simulation_start("twitter", config)

    total_actions = 0

    # 执行初始事件
    event_config = config.get("event_config", {})
    initial_posts = event_config.get("initial_posts", [])

    if initial_posts:
        initial_actions = {}
        for post in initial_posts:
            agent_id = post.get("poster_agent_id", 0)
            content = post.get("content", "")
            try:
                agent = env.agent_graph.get_agent(agent_id)
                initial_actions[agent] = ManualAction(
                    action_type=ActionType.CREATE_POST,
                    action_args={"content": content}
                )

                if action_logger:
                    action_logger.log_action(
                        round_num=0,
                        platform="twitter",
                        agent_id=agent_id,
                        agent_name=agent_names.get(agent_id, f"Agent_{agent_id}"),
                        action_type="CREATE_POST",
                        action_args={"content": content[:100] + "..." if len(content) > 100 else content}
                    )
                    total_actions += 1
            except Exception:
                pass

        if initial_actions:
            await env.step(initial_actions)
            print(f"[Twitter] 已发布 {len(initial_actions)} 条初始帖子")

    # 主模拟循环
    time_config = config.get("time_config", {})
    total_hours = time_config.get("total_simulation_hours", 72)
    minutes_per_round = time_config.get("minutes_per_round", 30)
    total_rounds = (total_hours * 60) // minutes_per_round

    start_time = datetime.now()

    for round_num in range(total_rounds):
        simulated_minutes = round_num * minutes_per_round
        simulated_hour = (simulated_minutes // 60) % 24
        simulated_day = simulated_minutes // (60 * 24) + 1

        active_agents = get_active_agents_for_round(
            env, config, simulated_hour, round_num
        )

        if not active_agents:
            continue

        if action_logger:
            action_logger.log_round_start(round_num + 1, simulated_hour, "twitter")

        actions = {agent: LLMAction() for _, agent in active_agents}
        await env.step(actions)

        # 记录动作
        for agent_id, agent in active_agents:
            if action_logger:
                action_logger.log_action(
                    round_num=round_num + 1,
                    platform="twitter",
                    agent_id=agent_id,
                    agent_name=agent_names.get(agent_id, f"Agent_{agent_id}"),
                    action_type="LLM_ACTION",
                    action_args={}
                )
                total_actions += 1

        if action_logger:
            action_logger.log_round_end(round_num + 1, len(active_agents), "twitter")

        if (round_num + 1) % 20 == 0:
            progress = (round_num + 1) / total_rounds * 100
            print(f"[Twitter] Day {simulated_day}, {simulated_hour:02d}:00 "
                  f"- Round {round_num + 1}/{total_rounds} ({progress:.1f}%)")

    await env.close()

    if action_logger:
        action_logger.log_simulation_end("twitter", total_rounds, total_actions)

    elapsed = (datetime.now() - start_time).total_seconds()
    print(f"[Twitter] 模拟完成! 耗时: {elapsed:.1f}秒, 总动作: {total_actions}")


async def run_reddit_simulation(
    config: Dict[str, Any],
    simulation_dir: str,
    action_logger: Optional[ActionLogger] = None
):
    """运行Reddit模拟"""
    print("[Reddit] 初始化...")

    model = create_model(config)

    profile_path = os.path.join(simulation_dir, "reddit_profiles.json")
    if not os.path.exists(profile_path):
        print(f"[Reddit] 错误: Profile文件不存在: {profile_path}")
        return

    agent_graph = await generate_reddit_agent_graph(
        profile_path=profile_path,
        model=model,
        available_actions=REDDIT_ACTIONS,
    )

    # 获取Agent名称映射
    agent_names = {}
    for agent_id, agent in agent_graph.get_agents():
        agent_names[agent_id] = getattr(agent, 'name', f'Agent_{agent_id}')

    db_path = os.path.join(simulation_dir, "reddit_simulation.db")
    if os.path.exists(db_path):
        os.remove(db_path)

    env = oasis.make(
        agent_graph=agent_graph,
        platform=oasis.DefaultPlatformType.REDDIT,
        database_path=db_path,
    )

    await env.reset()
    print("[Reddit] 环境已启动")

    if action_logger:
        action_logger.log_simulation_start("reddit", config)

    total_actions = 0

    # 执行初始事件
    event_config = config.get("event_config", {})
    initial_posts = event_config.get("initial_posts", [])

    if initial_posts:
        initial_actions = {}
        for post in initial_posts:
            agent_id = post.get("poster_agent_id", 0)
            content = post.get("content", "")
            try:
                agent = env.agent_graph.get_agent(agent_id)
                if agent in initial_actions:
                    if not isinstance(initial_actions[agent], list):
                        initial_actions[agent] = [initial_actions[agent]]
                    initial_actions[agent].append(ManualAction(
                        action_type=ActionType.CREATE_POST,
                        action_args={"content": content}
                    ))
                else:
                    initial_actions[agent] = ManualAction(
                        action_type=ActionType.CREATE_POST,
                        action_args={"content": content}
                    )

                if action_logger:
                    action_logger.log_action(
                        round_num=0,
                        platform="reddit",
                        agent_id=agent_id,
                        agent_name=agent_names.get(agent_id, f"Agent_{agent_id}"),
                        action_type="CREATE_POST",
                        action_args={"content": content[:100] + "..." if len(content) > 100 else content}
                    )
                    total_actions += 1
            except Exception:
                pass

        if initial_actions:
            await env.step(initial_actions)
            print(f"[Reddit] 已发布 {len(initial_actions)} 条初始帖子")

    # 主模拟循环
    time_config = config.get("time_config", {})
    total_hours = time_config.get("total_simulation_hours", 72)
    minutes_per_round = time_config.get("minutes_per_round", 30)
    total_rounds = (total_hours * 60) // minutes_per_round

    start_time = datetime.now()

    for round_num in range(total_rounds):
        simulated_minutes = round_num * minutes_per_round
        simulated_hour = (simulated_minutes // 60) % 24
        simulated_day = simulated_minutes // (60 * 24) + 1

        active_agents = get_active_agents_for_round(
            env, config, simulated_hour, round_num
        )

        if not active_agents:
            continue

        if action_logger:
            action_logger.log_round_start(round_num + 1, simulated_hour, "reddit")

        actions = {agent: LLMAction() for _, agent in active_agents}
        await env.step(actions)

        # 记录动作
        for agent_id, agent in active_agents:
            if action_logger:
                action_logger.log_action(
                    round_num=round_num + 1,
                    platform="reddit",
                    agent_id=agent_id,
                    agent_name=agent_names.get(agent_id, f"Agent_{agent_id}"),
                    action_type="LLM_ACTION",
                    action_args={}
                )
                total_actions += 1

        if action_logger:
            action_logger.log_round_end(round_num + 1, len(active_agents), "reddit")

        if (round_num + 1) % 20 == 0:
            progress = (round_num + 1) / total_rounds * 100
            print(f"[Reddit] Day {simulated_day}, {simulated_hour:02d}:00 "
                  f"- Round {round_num + 1}/{total_rounds} ({progress:.1f}%)")

    await env.close()

    if action_logger:
        action_logger.log_simulation_end("reddit", total_rounds, total_actions)

    elapsed = (datetime.now() - start_time).total_seconds()
    print(f"[Reddit] 模拟完成! 耗时: {elapsed:.1f}秒, 总动作: {total_actions}")


async def main():
    parser = argparse.ArgumentParser(description='OASIS双平台并行模拟')
    parser.add_argument(
        '--config',
        type=str,
        required=True,
        help='配置文件路径 (simulation_config.json)'
    )
    parser.add_argument(
        '--twitter-only',
        action='store_true',
        help='只运行Twitter模拟'
    )
    parser.add_argument(
        '--reddit-only',
        action='store_true',
        help='只运行Reddit模拟'
    )
    parser.add_argument(
        '--action-log',
        type=str,
        default='actions.jsonl',
        help='动作日志文件路径 (默认: actions.jsonl)'
    )

    args = parser.parse_args()

    if not os.path.exists(args.config):
        print(f"错误: 配置文件不存在: {args.config}")
        sys.exit(1)

    config = load_config(args.config)
    simulation_dir = os.path.dirname(args.config) or "."

    # 初始化日志配置（清理旧日志文件，使用固定名称）
    init_logging_for_simulation(simulation_dir)

    # 创建动作日志记录器
    action_log_path = os.path.join(simulation_dir, args.action_log)
    action_logger = ActionLogger(action_log_path)

    print("=" * 60)
    print("OASIS 双平台并行模拟")
    print(f"配置文件: {args.config}")
    print(f"模拟ID: {config.get('simulation_id', 'unknown')}")
    print(f"动作日志: {action_log_path}")
    print("=" * 60)

    time_config = config.get("time_config", {})
    print(f"\n模拟参数:")
    print(f"  - 总模拟时长: {time_config.get('total_simulation_hours', 72)}小时")
    print(f"  - 每轮时间: {time_config.get('minutes_per_round', 30)}分钟")
    print(f"  - Agent数量: {len(config.get('agent_configs', []))}")

    # LLM推理说明
    reasoning = config.get("generation_reasoning", "")
    if reasoning:
        print(f"\nLLM配置推理:")
        print(f"  {reasoning[:500]}..." if len(reasoning) > 500 else f"  {reasoning}")

    print("\n" + "=" * 60)

    start_time = datetime.now()

    if args.twitter_only:
        await run_twitter_simulation(config, simulation_dir, action_logger)
    elif args.reddit_only:
        await run_reddit_simulation(config, simulation_dir, action_logger)
    else:
        # 并行运行（共享同一个action_logger）
        await asyncio.gather(
            run_twitter_simulation(config, simulation_dir, action_logger),
            run_reddit_simulation(config, simulation_dir, action_logger),
        )

    total_elapsed = (datetime.now() - start_time).total_seconds()
    print("\n" + "=" * 60)
    print(f"全部模拟完成! 总耗时: {total_elapsed:.1f}秒")
    print(f"动作日志已保存到: {action_log_path}")
    print("=" * 60)


if __name__ == "__main__":
    asyncio.run(main())