Fix corrupted LLM-generated hour arrays in simulation config

The LLM sometimes generates arrays like [19202122] instead of
[19,20,21,22]. Add _sanitize_hours() to validate and fix these,
falling back to defaults when arrays contain single large numbers
or strings. Also add round-level debug logging.
This commit is contained in:
_Yusaki 2026-03-13 20:32:33 +07:00
parent 0ff30457a0
commit 5e206bdd84
2 changed files with 23 additions and 9 deletions

View file

@ -24,6 +24,19 @@ from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config')
def _sanitize_hours(val, default):
"""Fix LLM-generated hour arrays that got concatenated into single values."""
if not isinstance(val, list) or not val:
return default
if len(val) == 1:
item = val[0]
if isinstance(item, str) and len(item) > 2:
return default
if isinstance(item, (int, float)) and item > 23:
return default
return [h for h in val if isinstance(h, int) and 0 <= h <= 23] or default
# 中国作息时间配置(北京时间)
CHINA_TIMEZONE_CONFIG = {
# 深夜时段(几乎无人活动)
@ -631,12 +644,12 @@ Field descriptions:
minutes_per_round=result.get("minutes_per_round", 60), # 默认每轮1小时
agents_per_hour_min=agents_per_hour_min,
agents_per_hour_max=agents_per_hour_max,
peak_hours=result.get("peak_hours", [19, 20, 21, 22]),
off_peak_hours=result.get("off_peak_hours", [0, 1, 2, 3, 4, 5]),
peak_hours=_sanitize_hours(result.get("peak_hours"), [19, 20, 21, 22]),
off_peak_hours=_sanitize_hours(result.get("off_peak_hours"), [0, 1, 2, 3, 4, 5]),
off_peak_activity_multiplier=0.05, # 凌晨几乎无人
morning_hours=result.get("morning_hours", [6, 7, 8]),
morning_hours=_sanitize_hours(result.get("morning_hours"), [6, 7, 8]),
morning_activity_multiplier=0.4,
work_hours=result.get("work_hours", list(range(9, 19))),
work_hours=_sanitize_hours(result.get("work_hours"), list(range(9, 19))),
work_activity_multiplier=0.7,
peak_activity_multiplier=1.5
)
@ -890,7 +903,7 @@ Return JSON format (no markdown):
activity_level=cfg.get("activity_level", 0.5),
posts_per_hour=cfg.get("posts_per_hour", 0.5),
comments_per_hour=cfg.get("comments_per_hour", 1.0),
active_hours=cfg.get("active_hours", list(range(9, 23))),
active_hours=_sanitize_hours(cfg.get("active_hours"), list(range(9, 23))),
response_delay_min=cfg.get("response_delay_min", 5),
response_delay_max=cfg.get("response_delay_max", 60),
sentiment_bias=cfg.get("sentiment_bias", 0.0),

View file

@ -620,7 +620,7 @@ class RedditSimulationRunner:
print(f" 已发布 {len(initial_actions)} 条初始帖子")
# 主模拟循环
print("\n开始模拟循环...")
print("\n开始模拟循环...", flush=True)
start_time = datetime.now()
for round_num in range(total_rounds):
@ -632,6 +632,7 @@ class RedditSimulationRunner:
self.env, simulated_hour, round_num
)
print(f" Round {round_num+1}/{total_rounds}: hour={simulated_hour}, active={len(active_agents)}", flush=True)
if not active_agents:
continue