624 lines
18 KiB
JSON
624 lines
18 KiB
JSON
{
|
|
"cycles": [
|
|
{
|
|
"cycle_id": "8fcec95826e7",
|
|
"phase": "baseline",
|
|
"timestamp": 1772475644.661336,
|
|
"results": [
|
|
{
|
|
"case_id": "SKY/sunset-2007",
|
|
"domain": "SKY",
|
|
"scenario": "sunset-2007",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772475644.660887
|
|
},
|
|
{
|
|
"case_id": "SKY/sunset-dramatic",
|
|
"domain": "SKY",
|
|
"scenario": "sunset-dramatic",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772475644.6611881
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "8fcec95826e7",
|
|
"phase": "validation",
|
|
"timestamp": 1772475644.989441,
|
|
"results": [
|
|
{
|
|
"case_id": "SKY/sunset-2007",
|
|
"domain": "SKY",
|
|
"scenario": "sunset-2007",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772475644.9890392
|
|
},
|
|
{
|
|
"case_id": "SKY/sunset-dramatic",
|
|
"domain": "SKY",
|
|
"scenario": "sunset-dramatic",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772475644.989145
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "75a5e53edff7",
|
|
"phase": "baseline",
|
|
"timestamp": 1772478861.780673,
|
|
"results": [
|
|
{
|
|
"case_id": "SKY/san-diego-sunset",
|
|
"domain": "SKY",
|
|
"scenario": "san-diego-sunset",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772478861.780382
|
|
},
|
|
{
|
|
"case_id": "SKY/city-sunrise",
|
|
"domain": "SKY",
|
|
"scenario": "city-sunrise",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772478861.780554
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "75a5e53edff7",
|
|
"phase": "validation",
|
|
"timestamp": 1772478862.082939,
|
|
"results": [
|
|
{
|
|
"case_id": "SKY/san-diego-sunset",
|
|
"domain": "SKY",
|
|
"scenario": "san-diego-sunset",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772478862.082635
|
|
},
|
|
{
|
|
"case_id": "SKY/city-sunrise",
|
|
"domain": "SKY",
|
|
"scenario": "city-sunrise",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1772478862.0827289
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "01aabcdb1b47",
|
|
"phase": "baseline",
|
|
"timestamp": 1772480237.391291,
|
|
"results": [
|
|
{
|
|
"case_id": "SKY/sunset-dramatic",
|
|
"domain": "SKY",
|
|
"scenario": "sunset-dramatic",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480166.56554
|
|
},
|
|
{
|
|
"case_id": "WEATHER/fog",
|
|
"domain": "WEATHER",
|
|
"scenario": "fog",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480237.391087
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "01aabcdb1b47",
|
|
"phase": "validation",
|
|
"timestamp": 1772480400.762218,
|
|
"results": [
|
|
{
|
|
"case_id": "SKY/sunset-dramatic",
|
|
"domain": "SKY",
|
|
"scenario": "sunset-dramatic",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480329.305185
|
|
},
|
|
{
|
|
"case_id": "WEATHER/fog",
|
|
"domain": "WEATHER",
|
|
"scenario": "fog",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480400.761955
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "d627056eafd3",
|
|
"phase": "baseline",
|
|
"timestamp": 1772480544.826165,
|
|
"results": [
|
|
{
|
|
"case_id": "WEATHER/fog",
|
|
"domain": "WEATHER",
|
|
"scenario": "fog",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480469.0631418
|
|
},
|
|
{
|
|
"case_id": "SEA/ocean-horizon",
|
|
"domain": "SEA",
|
|
"scenario": "ocean-horizon",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480544.825933
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "d627056eafd3",
|
|
"phase": "validation",
|
|
"timestamp": 1772480736.6663759,
|
|
"results": [
|
|
{
|
|
"case_id": "WEATHER/fog",
|
|
"domain": "WEATHER",
|
|
"scenario": "fog",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480635.257945
|
|
},
|
|
{
|
|
"case_id": "SEA/ocean-horizon",
|
|
"domain": "SEA",
|
|
"scenario": "ocean-horizon",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480736.6660538
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "17dfd9cd5ba3",
|
|
"phase": "baseline",
|
|
"timestamp": 1772480920.3345332,
|
|
"results": [
|
|
{
|
|
"case_id": "SEA/ocean-horizon",
|
|
"domain": "SEA",
|
|
"scenario": "ocean-horizon",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480840.287804
|
|
},
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772480920.3341491
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "17dfd9cd5ba3",
|
|
"phase": "validation",
|
|
"timestamp": 1772481073.129599,
|
|
"results": [
|
|
{
|
|
"case_id": "SEA/ocean-horizon",
|
|
"domain": "SEA",
|
|
"scenario": "ocean-horizon",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481011.014934
|
|
},
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481073.1293418
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "61aa553c22f8",
|
|
"phase": "baseline",
|
|
"timestamp": 1772481221.340435,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481146.4434931
|
|
},
|
|
{
|
|
"case_id": "SKY/fiery-silhouette",
|
|
"domain": "SKY",
|
|
"scenario": "fiery-silhouette",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481221.340192
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "61aa553c22f8",
|
|
"phase": "validation",
|
|
"timestamp": 1772481416.291574,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481335.9913118
|
|
},
|
|
{
|
|
"case_id": "SKY/fiery-silhouette",
|
|
"domain": "SKY",
|
|
"scenario": "fiery-silhouette",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481416.291146
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "3b2642293ca6",
|
|
"phase": "baseline",
|
|
"timestamp": 1772481810.530186,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Orchestrator failed (no silent fallback): asyncio.run() cannot be called from a running event loop",
|
|
"timestamp": 1772481532.509093
|
|
},
|
|
{
|
|
"case_id": "SEA/ocean-horizon",
|
|
"domain": "SEA",
|
|
"scenario": "ocean-horizon",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "Screenshot not found: /Users/jamestagg/Documents/GitHub/ralpha/sessions/runs/ralph_20260302_115852/screenshots/iter_000.png",
|
|
"timestamp": 1772481810.5300689
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "3b2642293ca6",
|
|
"phase": "validation",
|
|
"timestamp": 1772481827.2748742,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "MCP server not available",
|
|
"timestamp": 1772481827.270559
|
|
},
|
|
{
|
|
"case_id": "SEA/ocean-horizon",
|
|
"domain": "SEA",
|
|
"scenario": "ocean-horizon",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "MCP server not available",
|
|
"timestamp": 1772481827.274692
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "0c9ed13dd929",
|
|
"phase": "baseline",
|
|
"timestamp": 1772481827.373427,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "MCP server not available",
|
|
"timestamp": 1772481827.369146
|
|
},
|
|
{
|
|
"case_id": "SKY/tappan-zee-sunset",
|
|
"domain": "SKY",
|
|
"scenario": "tappan-zee-sunset",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "MCP server not available",
|
|
"timestamp": 1772481827.373281
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "0c9ed13dd929",
|
|
"phase": "validation",
|
|
"timestamp": 1772481854.325294,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "MCP server not available",
|
|
"timestamp": 1772481854.319867
|
|
},
|
|
{
|
|
"case_id": "SKY/tappan-zee-sunset",
|
|
"domain": "SKY",
|
|
"scenario": "tappan-zee-sunset",
|
|
"best_score": 0.0,
|
|
"iterations": 0,
|
|
"elapsed_seconds": 0.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "",
|
|
"error": "MCP server not available",
|
|
"timestamp": 1772481854.324975
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "ef992b45386f",
|
|
"phase": "baseline",
|
|
"timestamp": 1774626358.072004,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1774626358.071899
|
|
},
|
|
{
|
|
"case_id": "SKY/fiery-silhouette",
|
|
"domain": "SKY",
|
|
"scenario": "fiery-silhouette",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1774626358.071926
|
|
},
|
|
{
|
|
"case_id": "PORTRAIT/jamest-standing",
|
|
"domain": "PORTRAIT",
|
|
"scenario": "jamest-standing",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1774626358.071948
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cycle_id": "ef992b45386f",
|
|
"phase": "validation",
|
|
"timestamp": 1774626358.159755,
|
|
"results": [
|
|
{
|
|
"case_id": "FOOD/spaghetti",
|
|
"domain": "FOOD",
|
|
"scenario": "spaghetti",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1774626358.159591
|
|
},
|
|
{
|
|
"case_id": "SKY/fiery-silhouette",
|
|
"domain": "SKY",
|
|
"scenario": "fiery-silhouette",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1774626358.159632
|
|
},
|
|
{
|
|
"case_id": "PORTRAIT/jamest-standing",
|
|
"domain": "PORTRAIT",
|
|
"scenario": "jamest-standing",
|
|
"best_score": 0.42,
|
|
"iterations": 10,
|
|
"elapsed_seconds": 1.0,
|
|
"specialists_used": [],
|
|
"regressions": 0,
|
|
"exit_reason": "dry_run",
|
|
"error": "",
|
|
"timestamp": 1774626358.159658
|
|
}
|
|
]
|
|
}
|
|
]
|
|
} |