ralpha-assets/benchmark/scores/evolution_history.json

624 lines
18 KiB
JSON

{
"cycles": [
{
"cycle_id": "8fcec95826e7",
"phase": "baseline",
"timestamp": 1772475644.661336,
"results": [
{
"case_id": "SKY/sunset-2007",
"domain": "SKY",
"scenario": "sunset-2007",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772475644.660887
},
{
"case_id": "SKY/sunset-dramatic",
"domain": "SKY",
"scenario": "sunset-dramatic",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772475644.6611881
}
]
},
{
"cycle_id": "8fcec95826e7",
"phase": "validation",
"timestamp": 1772475644.989441,
"results": [
{
"case_id": "SKY/sunset-2007",
"domain": "SKY",
"scenario": "sunset-2007",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772475644.9890392
},
{
"case_id": "SKY/sunset-dramatic",
"domain": "SKY",
"scenario": "sunset-dramatic",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772475644.989145
}
]
},
{
"cycle_id": "75a5e53edff7",
"phase": "baseline",
"timestamp": 1772478861.780673,
"results": [
{
"case_id": "SKY/san-diego-sunset",
"domain": "SKY",
"scenario": "san-diego-sunset",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772478861.780382
},
{
"case_id": "SKY/city-sunrise",
"domain": "SKY",
"scenario": "city-sunrise",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772478861.780554
}
]
},
{
"cycle_id": "75a5e53edff7",
"phase": "validation",
"timestamp": 1772478862.082939,
"results": [
{
"case_id": "SKY/san-diego-sunset",
"domain": "SKY",
"scenario": "san-diego-sunset",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772478862.082635
},
{
"case_id": "SKY/city-sunrise",
"domain": "SKY",
"scenario": "city-sunrise",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1772478862.0827289
}
]
},
{
"cycle_id": "01aabcdb1b47",
"phase": "baseline",
"timestamp": 1772480237.391291,
"results": [
{
"case_id": "SKY/sunset-dramatic",
"domain": "SKY",
"scenario": "sunset-dramatic",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480166.56554
},
{
"case_id": "WEATHER/fog",
"domain": "WEATHER",
"scenario": "fog",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480237.391087
}
]
},
{
"cycle_id": "01aabcdb1b47",
"phase": "validation",
"timestamp": 1772480400.762218,
"results": [
{
"case_id": "SKY/sunset-dramatic",
"domain": "SKY",
"scenario": "sunset-dramatic",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480329.305185
},
{
"case_id": "WEATHER/fog",
"domain": "WEATHER",
"scenario": "fog",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480400.761955
}
]
},
{
"cycle_id": "d627056eafd3",
"phase": "baseline",
"timestamp": 1772480544.826165,
"results": [
{
"case_id": "WEATHER/fog",
"domain": "WEATHER",
"scenario": "fog",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480469.0631418
},
{
"case_id": "SEA/ocean-horizon",
"domain": "SEA",
"scenario": "ocean-horizon",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480544.825933
}
]
},
{
"cycle_id": "d627056eafd3",
"phase": "validation",
"timestamp": 1772480736.6663759,
"results": [
{
"case_id": "WEATHER/fog",
"domain": "WEATHER",
"scenario": "fog",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480635.257945
},
{
"case_id": "SEA/ocean-horizon",
"domain": "SEA",
"scenario": "ocean-horizon",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480736.6660538
}
]
},
{
"cycle_id": "17dfd9cd5ba3",
"phase": "baseline",
"timestamp": 1772480920.3345332,
"results": [
{
"case_id": "SEA/ocean-horizon",
"domain": "SEA",
"scenario": "ocean-horizon",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480840.287804
},
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772480920.3341491
}
]
},
{
"cycle_id": "17dfd9cd5ba3",
"phase": "validation",
"timestamp": 1772481073.129599,
"results": [
{
"case_id": "SEA/ocean-horizon",
"domain": "SEA",
"scenario": "ocean-horizon",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481011.014934
},
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481073.1293418
}
]
},
{
"cycle_id": "61aa553c22f8",
"phase": "baseline",
"timestamp": 1772481221.340435,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481146.4434931
},
{
"case_id": "SKY/fiery-silhouette",
"domain": "SKY",
"scenario": "fiery-silhouette",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481221.340192
}
]
},
{
"cycle_id": "61aa553c22f8",
"phase": "validation",
"timestamp": 1772481416.291574,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481335.9913118
},
{
"case_id": "SKY/fiery-silhouette",
"domain": "SKY",
"scenario": "fiery-silhouette",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481416.291146
}
]
},
{
"cycle_id": "3b2642293ca6",
"phase": "baseline",
"timestamp": 1772481810.530186,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
"timestamp": 1772481532.509093
},
{
"case_id": "SEA/ocean-horizon",
"domain": "SEA",
"scenario": "ocean-horizon",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "Screenshot not found: /Users/jamestagg/Documents/GitHub/ralpha/sessions/runs/ralph_20260302_115852/screenshots/iter_000.png",
"timestamp": 1772481810.5300689
}
]
},
{
"cycle_id": "3b2642293ca6",
"phase": "validation",
"timestamp": 1772481827.2748742,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "MCP server not available",
"timestamp": 1772481827.270559
},
{
"case_id": "SEA/ocean-horizon",
"domain": "SEA",
"scenario": "ocean-horizon",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "MCP server not available",
"timestamp": 1772481827.274692
}
]
},
{
"cycle_id": "0c9ed13dd929",
"phase": "baseline",
"timestamp": 1772481827.373427,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "MCP server not available",
"timestamp": 1772481827.369146
},
{
"case_id": "SKY/tappan-zee-sunset",
"domain": "SKY",
"scenario": "tappan-zee-sunset",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "MCP server not available",
"timestamp": 1772481827.373281
}
]
},
{
"cycle_id": "0c9ed13dd929",
"phase": "validation",
"timestamp": 1772481854.325294,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "MCP server not available",
"timestamp": 1772481854.319867
},
{
"case_id": "SKY/tappan-zee-sunset",
"domain": "SKY",
"scenario": "tappan-zee-sunset",
"best_score": 0.0,
"iterations": 0,
"elapsed_seconds": 0.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "",
"error": "MCP server not available",
"timestamp": 1772481854.324975
}
]
},
{
"cycle_id": "ef992b45386f",
"phase": "baseline",
"timestamp": 1774626358.072004,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1774626358.071899
},
{
"case_id": "SKY/fiery-silhouette",
"domain": "SKY",
"scenario": "fiery-silhouette",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1774626358.071926
},
{
"case_id": "PORTRAIT/jamest-standing",
"domain": "PORTRAIT",
"scenario": "jamest-standing",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1774626358.071948
}
]
},
{
"cycle_id": "ef992b45386f",
"phase": "validation",
"timestamp": 1774626358.159755,
"results": [
{
"case_id": "FOOD/spaghetti",
"domain": "FOOD",
"scenario": "spaghetti",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1774626358.159591
},
{
"case_id": "SKY/fiery-silhouette",
"domain": "SKY",
"scenario": "fiery-silhouette",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1774626358.159632
},
{
"case_id": "PORTRAIT/jamest-standing",
"domain": "PORTRAIT",
"scenario": "jamest-standing",
"best_score": 0.42,
"iterations": 10,
"elapsed_seconds": 1.0,
"specialists_used": [],
"regressions": 0,
"exit_reason": "dry_run",
"error": "",
"timestamp": 1774626358.159658
}
]
}
]
}