2cf728210e
play.html now loads solitaire_wasm.js alongside the Bevy canvas and
exposes the same window.__FERROUS_DEBUG__ object as /play-classic.
The bridge runs an independent SolitaireGame (WASM logic layer) seeded
from ?seed= / ?draw3= URL params; Bevy renders the visual game in
parallel without coupling.
Methods exposed: seed, state, legalMoves, moveHistory, snapshot,
applyLegalMove, applyMove, draw, undo, serialize, fromSaved, newGame,
failureReport, replayPayload, runAutoplay — matching the /play-classic
contract so the shared Playwright harness targets either route without
modification.
cycle_metrics.js: add --route play-classic|play flag (default
play-classic). Routes to /${route}?seed=N. The resume-overlay clear
step is skipped for /play since the Bevy build uses localStorage-backed
WasmStorage, not a #resume-overlay element.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
383 lines
15 KiB
JavaScript
383 lines
15 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
const { spawn } = require("child_process");
|
|
const { chromium } = require("playwright");
|
|
|
|
const serverPort = 4173;
|
|
const baseUrl = `http://127.0.0.1:${serverPort}`;
|
|
const repoRoot = path.resolve(__dirname, "../../..");
|
|
|
|
function readArg(name, fallback) {
|
|
const idx = process.argv.indexOf(name);
|
|
if (idx < 0 || idx + 1 >= process.argv.length) return fallback;
|
|
return process.argv[idx + 1];
|
|
}
|
|
|
|
function parsePositiveInt(name, fallback) {
|
|
const value = Number.parseInt(readArg(name, String(fallback)), 10);
|
|
if (Number.isNaN(value) || value <= 0) return fallback;
|
|
return value;
|
|
}
|
|
|
|
function parseOptionalNumber(name) {
|
|
const raw = readArg(name, null);
|
|
if (raw === null) return null;
|
|
const value = Number.parseFloat(raw);
|
|
return Number.isFinite(value) ? value : null;
|
|
}
|
|
|
|
function hasFlag(name) {
|
|
return process.argv.includes(name);
|
|
}
|
|
|
|
function buildSummary(results) {
|
|
return {
|
|
games_played: results.length,
|
|
won: results.filter((r) => r.terminal === "won").length,
|
|
no_moves: results.filter((r) => r.terminal === "no_moves").length,
|
|
cycle: results.filter((r) => r.terminal === "cycle").length,
|
|
step_budget: results.filter((r) => r.terminal === "step_budget").length,
|
|
apply_failed: results.filter((r) => r.terminal === "apply_failed").length,
|
|
invariant_failed: results.filter((r) => r.terminal === "invariant_failed").length,
|
|
missing_snapshot: results.filter((r) => r.terminal === "missing_snapshot").length,
|
|
games_with_issues: results.filter((r) => r.issues.length > 0).length,
|
|
with_page_errors: results.filter((r) => r.page_errors > 0).length,
|
|
with_console_errors: results.filter((r) => r.console_errors > 0).length,
|
|
draw1: results.filter((r) => !r.draw3).length,
|
|
draw3: results.filter((r) => r.draw3).length,
|
|
};
|
|
}
|
|
|
|
function withRates(results, predicate) {
|
|
const subset = results.filter(predicate);
|
|
const won = subset.filter((r) => r.terminal === "won").length;
|
|
const cycle = subset.filter((r) => r.terminal === "cycle").length;
|
|
const stepBudget = subset.filter((r) => r.terminal === "step_budget").length;
|
|
return {
|
|
games: subset.length,
|
|
won,
|
|
win_rate_pct: subset.length === 0 ? 0 : Number(((won / subset.length) * 100).toFixed(1)),
|
|
cycle,
|
|
cycle_rate_pct:
|
|
subset.length === 0 ? 0 : Number(((cycle / subset.length) * 100).toFixed(1)),
|
|
step_budget: stepBudget,
|
|
step_budget_rate_pct:
|
|
subset.length === 0 ? 0 : Number(((stepBudget / subset.length) * 100).toFixed(1)),
|
|
};
|
|
}
|
|
|
|
async function waitForHealth(timeoutMs = 120_000) {
|
|
const start = Date.now();
|
|
while (Date.now() - start < timeoutMs) {
|
|
try {
|
|
const res = await fetch(`${baseUrl}/health`);
|
|
if (res.ok) return true;
|
|
} catch {
|
|
// server not ready yet
|
|
}
|
|
await new Promise((resolve) => setTimeout(resolve, 400));
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function startServer() {
|
|
return spawn("cargo", ["run", "-p", "solitaire_server", "--quiet"], {
|
|
cwd: repoRoot,
|
|
env: {
|
|
...process.env,
|
|
SQLX_OFFLINE: "true",
|
|
DATABASE_URL: "sqlite://ferrous-solitaire-e2e.db",
|
|
JWT_SECRET: "ferrous_solitaire_e2e_secret_32_chars_long",
|
|
SERVER_PORT: String(serverPort),
|
|
},
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
});
|
|
}
|
|
|
|
async function main() {
|
|
const games = parsePositiveInt("--games", 1000);
|
|
const maxSteps = parsePositiveInt("--steps", 350);
|
|
const maxVisitsPerState = parsePositiveInt("--max-visits", 2);
|
|
const policyArg = readArg("--policy", "loop_aware");
|
|
const policy = policyArg === "baseline" ? "baseline" : "loop_aware";
|
|
const outPath = readArg("--out", "/tmp/playwright-cycle-metrics.json");
|
|
// --route play-classic (default) or --route play
|
|
const routeArg = readArg("--route", "play-classic");
|
|
const route = routeArg === "play" ? "play" : "play-classic";
|
|
const maxCycleRateAll = parseOptionalNumber("--max-cycle-rate-all");
|
|
const maxCycleRateDraw1 = parseOptionalNumber("--max-cycle-rate-draw1");
|
|
const maxCycleRateDraw3 = parseOptionalNumber("--max-cycle-rate-draw3");
|
|
const maxStepBudgetRateAll = parseOptionalNumber("--max-step-budget-rate-all");
|
|
const maxStepBudgetRateDraw1 = parseOptionalNumber("--max-step-budget-rate-draw1");
|
|
const maxStepBudgetRateDraw3 = parseOptionalNumber("--max-step-budget-rate-draw3");
|
|
const minWinRateAll = parseOptionalNumber("--min-win-rate-all");
|
|
const requireZeroIssues = hasFlag("--require-zero-issues");
|
|
|
|
const server = startServer();
|
|
let serverLog = "";
|
|
server.stdout.on("data", (d) => {
|
|
serverLog += d.toString();
|
|
});
|
|
server.stderr.on("data", (d) => {
|
|
serverLog += d.toString();
|
|
});
|
|
|
|
const cleanup = async () => {
|
|
if (!server.killed) {
|
|
server.kill("SIGTERM");
|
|
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
if (!server.killed) server.kill("SIGKILL");
|
|
}
|
|
};
|
|
|
|
try {
|
|
if (!(await waitForHealth())) {
|
|
throw new Error("server_failed_health_check");
|
|
}
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const context = await browser.newContext();
|
|
const page = await context.newPage();
|
|
const results = [];
|
|
|
|
for (let i = 0; i < games; i++) {
|
|
const seed = i;
|
|
const draw3 = i % 2 === 1;
|
|
const suffix = draw3 ? "&draw3=" : "";
|
|
|
|
const pageErrors = [];
|
|
const consoleErrors = [];
|
|
page.removeAllListeners("pageerror");
|
|
page.removeAllListeners("console");
|
|
page.on("pageerror", (err) => pageErrors.push(String(err)));
|
|
page.on("console", (msg) => {
|
|
if (msg.type() === "error") {
|
|
consoleErrors.push(msg.text());
|
|
}
|
|
});
|
|
|
|
await page.goto(`${baseUrl}/${route}?seed=${seed}${suffix}`, {
|
|
waitUntil: "domcontentloaded",
|
|
});
|
|
|
|
if (route === "play-classic") {
|
|
const resumeVisible = await page
|
|
.locator("#resume-overlay:not(.hidden)")
|
|
.isVisible()
|
|
.catch(() => false);
|
|
if (resumeVisible) {
|
|
await page.evaluate(() => localStorage.removeItem("fs_game_save"));
|
|
await page.reload({ waitUntil: "domcontentloaded" });
|
|
}
|
|
}
|
|
|
|
await page.waitForFunction(
|
|
() =>
|
|
typeof window.__FERROUS_DEBUG__ === "object" &&
|
|
window.__FERROUS_DEBUG__.seed() !== null,
|
|
null,
|
|
{ timeout: 30_000 }
|
|
);
|
|
|
|
const run = await page.evaluate(({ stepCap, policyName, maxVisits }) => {
|
|
const debug = window.__FERROUS_DEBUG__;
|
|
const run = debug.runAutoplay({
|
|
maxSteps: stepCap,
|
|
maxVisitsPerState: maxVisits,
|
|
policy: policyName,
|
|
});
|
|
const snap = run.snapshot || debug.snapshot();
|
|
const payload = debug.replayPayload();
|
|
|
|
return {
|
|
terminal: run.terminal || "unknown",
|
|
step: run.step ?? -1,
|
|
error: run.error || run.reason || null,
|
|
ok: !!run.ok,
|
|
score: snap?.state?.score ?? null,
|
|
move_count: snap?.state?.move_count ?? null,
|
|
invariant_ok: !!snap?.invariants?.state_ok,
|
|
history_len: Array.isArray(snap?.move_history) ? snap.move_history.length : null,
|
|
replay_payload_present: payload !== null,
|
|
replay_moves_len: Array.isArray(payload?.moves) ? payload.moves.length : 0,
|
|
};
|
|
}, { stepCap: maxSteps, policyName: policy, maxVisits: maxVisitsPerState });
|
|
|
|
const issues = [];
|
|
if (!run.ok) issues.push(`autoplay_failed:${run.error || run.terminal || "unknown"}`);
|
|
if (!run.invariant_ok) issues.push("invariant_not_ok");
|
|
if (run.terminal === "invariant_failed") issues.push("invariant_failed_terminal");
|
|
if (run.terminal === "missing_snapshot") issues.push("missing_snapshot_terminal");
|
|
if (run.terminal === "apply_failed") issues.push(`apply_failed:${run.error}`);
|
|
if (run.history_len > 0 && !run.replay_payload_present) {
|
|
issues.push("missing_replay_payload_after_moves");
|
|
}
|
|
if (
|
|
run.replay_payload_present &&
|
|
run.history_len !== null &&
|
|
run.replay_moves_len !== run.history_len
|
|
) {
|
|
issues.push(`replay_history_mismatch:${run.replay_moves_len}/${run.history_len}`);
|
|
}
|
|
if (pageErrors.length > 0) issues.push(`page_errors:${pageErrors.length}`);
|
|
if (consoleErrors.length > 0) issues.push(`console_errors:${consoleErrors.length}`);
|
|
|
|
results.push({
|
|
game_index: i,
|
|
seed,
|
|
draw3,
|
|
...run,
|
|
page_errors: pageErrors.length,
|
|
console_errors: consoleErrors.length,
|
|
issues,
|
|
});
|
|
|
|
if ((i + 1) % 100 === 0) {
|
|
console.error(`progress ${i + 1}/${games}`);
|
|
}
|
|
}
|
|
|
|
const summary = buildSummary(results);
|
|
const rates = {
|
|
all: withRates(results, () => true),
|
|
draw1: withRates(results, (r) => !r.draw3),
|
|
draw3: withRates(results, (r) => r.draw3),
|
|
};
|
|
const regressionFailures = [];
|
|
if (maxCycleRateAll !== null && rates.all.cycle_rate_pct > maxCycleRateAll) {
|
|
regressionFailures.push(
|
|
`all.cycle_rate_pct ${rates.all.cycle_rate_pct}% > ${maxCycleRateAll}%`
|
|
);
|
|
}
|
|
if (maxCycleRateDraw1 !== null && rates.draw1.cycle_rate_pct > maxCycleRateDraw1) {
|
|
regressionFailures.push(
|
|
`draw1.cycle_rate_pct ${rates.draw1.cycle_rate_pct}% > ${maxCycleRateDraw1}%`
|
|
);
|
|
}
|
|
if (maxCycleRateDraw3 !== null && rates.draw3.cycle_rate_pct > maxCycleRateDraw3) {
|
|
regressionFailures.push(
|
|
`draw3.cycle_rate_pct ${rates.draw3.cycle_rate_pct}% > ${maxCycleRateDraw3}%`
|
|
);
|
|
}
|
|
if (minWinRateAll !== null && rates.all.win_rate_pct < minWinRateAll) {
|
|
regressionFailures.push(
|
|
`all.win_rate_pct ${rates.all.win_rate_pct}% < ${minWinRateAll}%`
|
|
);
|
|
}
|
|
if (maxStepBudgetRateAll !== null && rates.all.step_budget_rate_pct > maxStepBudgetRateAll) {
|
|
regressionFailures.push(
|
|
`all.step_budget_rate_pct ${rates.all.step_budget_rate_pct}% > ${maxStepBudgetRateAll}%`
|
|
);
|
|
}
|
|
if (
|
|
maxStepBudgetRateDraw1 !== null &&
|
|
rates.draw1.step_budget_rate_pct > maxStepBudgetRateDraw1
|
|
) {
|
|
regressionFailures.push(
|
|
`draw1.step_budget_rate_pct ${rates.draw1.step_budget_rate_pct}% > ${maxStepBudgetRateDraw1}%`
|
|
);
|
|
}
|
|
if (
|
|
maxStepBudgetRateDraw3 !== null &&
|
|
rates.draw3.step_budget_rate_pct > maxStepBudgetRateDraw3
|
|
) {
|
|
regressionFailures.push(
|
|
`draw3.step_budget_rate_pct ${rates.draw3.step_budget_rate_pct}% > ${maxStepBudgetRateDraw3}%`
|
|
);
|
|
}
|
|
if (requireZeroIssues) {
|
|
if (summary.games_with_issues > 0) {
|
|
regressionFailures.push(`games_with_issues ${summary.games_with_issues} > 0`);
|
|
}
|
|
if (summary.apply_failed > 0) {
|
|
regressionFailures.push(`apply_failed ${summary.apply_failed} > 0`);
|
|
}
|
|
if (summary.invariant_failed > 0) {
|
|
regressionFailures.push(`invariant_failed ${summary.invariant_failed} > 0`);
|
|
}
|
|
if (summary.missing_snapshot > 0) {
|
|
regressionFailures.push(`missing_snapshot ${summary.missing_snapshot} > 0`);
|
|
}
|
|
if (summary.with_page_errors > 0) {
|
|
regressionFailures.push(`with_page_errors ${summary.with_page_errors} > 0`);
|
|
}
|
|
if (summary.with_console_errors > 0) {
|
|
regressionFailures.push(`with_console_errors ${summary.with_console_errors} > 0`);
|
|
}
|
|
}
|
|
|
|
const out = {
|
|
config: {
|
|
games,
|
|
max_steps: maxSteps,
|
|
policy,
|
|
max_visits_per_state: maxVisitsPerState,
|
|
},
|
|
summary,
|
|
rates,
|
|
regression: {
|
|
pass: regressionFailures.length === 0,
|
|
thresholds: {
|
|
max_cycle_rate_all: maxCycleRateAll,
|
|
max_cycle_rate_draw1: maxCycleRateDraw1,
|
|
max_cycle_rate_draw3: maxCycleRateDraw3,
|
|
max_step_budget_rate_all: maxStepBudgetRateAll,
|
|
max_step_budget_rate_draw1: maxStepBudgetRateDraw1,
|
|
max_step_budget_rate_draw3: maxStepBudgetRateDraw3,
|
|
min_win_rate_all: minWinRateAll,
|
|
require_zero_issues: requireZeroIssues,
|
|
},
|
|
failures: regressionFailures,
|
|
},
|
|
issue_samples: results
|
|
.filter((r) => r.issues.length > 0)
|
|
.slice(0, 25)
|
|
.map((r) => ({
|
|
seed: r.seed,
|
|
draw3: r.draw3,
|
|
terminal: r.terminal,
|
|
issues: r.issues,
|
|
score: r.score,
|
|
move_count: r.move_count,
|
|
})),
|
|
cycle_samples: results
|
|
.filter((r) => r.terminal === "cycle")
|
|
.slice(0, 25)
|
|
.map((r) => ({
|
|
seed: r.seed,
|
|
draw3: r.draw3,
|
|
step: r.step,
|
|
score: r.score,
|
|
move_count: r.move_count,
|
|
})),
|
|
};
|
|
|
|
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
fs.writeFileSync(outPath, JSON.stringify({ out, results }, null, 2));
|
|
console.log(JSON.stringify(out, null, 2));
|
|
if (regressionFailures.length > 0) {
|
|
console.error(`regression check failed: ${regressionFailures.join("; ")}`);
|
|
await browser.close();
|
|
await cleanup();
|
|
process.exit(2);
|
|
}
|
|
|
|
await browser.close();
|
|
await cleanup();
|
|
} catch (err) {
|
|
const errorOut = {
|
|
error: String(err),
|
|
server_log_tail: serverLog.slice(-5000),
|
|
};
|
|
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
fs.writeFileSync(outPath, JSON.stringify(errorOut, null, 2));
|
|
console.error(JSON.stringify(errorOut, null, 2));
|
|
await cleanup();
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main();
|