#!/usr/bin/env node const fs = require("fs"); const path = require("path"); const { spawn } = require("child_process"); const { chromium } = require("playwright"); const serverPort = 4173; const baseUrl = `http://127.0.0.1:${serverPort}`; const repoRoot = path.resolve(__dirname, "../../.."); function readArg(name, fallback) { const idx = process.argv.indexOf(name); if (idx < 0 || idx + 1 >= process.argv.length) return fallback; return process.argv[idx + 1]; } function parsePositiveInt(name, fallback) { const value = Number.parseInt(readArg(name, String(fallback)), 10); if (Number.isNaN(value) || value <= 0) return fallback; return value; } function parseOptionalNumber(name) { const raw = readArg(name, null); if (raw === null) return null; const value = Number.parseFloat(raw); return Number.isFinite(value) ? value : null; } function hasFlag(name) { return process.argv.includes(name); } function buildSummary(results) { return { games_played: results.length, won: results.filter((r) => r.terminal === "won").length, no_moves: results.filter((r) => r.terminal === "no_moves").length, cycle: results.filter((r) => r.terminal === "cycle").length, step_budget: results.filter((r) => r.terminal === "step_budget").length, apply_failed: results.filter((r) => r.terminal === "apply_failed").length, invariant_failed: results.filter((r) => r.terminal === "invariant_failed").length, missing_snapshot: results.filter((r) => r.terminal === "missing_snapshot").length, games_with_issues: results.filter((r) => r.issues.length > 0).length, with_page_errors: results.filter((r) => r.page_errors > 0).length, with_console_errors: results.filter((r) => r.console_errors > 0).length, draw1: results.filter((r) => !r.draw3).length, draw3: results.filter((r) => r.draw3).length, }; } function withRates(results, predicate) { const subset = results.filter(predicate); const won = subset.filter((r) => r.terminal === "won").length; const cycle = subset.filter((r) => r.terminal === "cycle").length; const stepBudget = subset.filter((r) => r.terminal === "step_budget").length; return { games: subset.length, won, win_rate_pct: subset.length === 0 ? 0 : Number(((won / subset.length) * 100).toFixed(1)), cycle, cycle_rate_pct: subset.length === 0 ? 0 : Number(((cycle / subset.length) * 100).toFixed(1)), step_budget: stepBudget, step_budget_rate_pct: subset.length === 0 ? 0 : Number(((stepBudget / subset.length) * 100).toFixed(1)), }; } async function waitForHealth(timeoutMs = 120_000) { const start = Date.now(); while (Date.now() - start < timeoutMs) { try { const res = await fetch(`${baseUrl}/health`); if (res.ok) return true; } catch { // server not ready yet } await new Promise((resolve) => setTimeout(resolve, 400)); } return false; } function startServer() { return spawn("cargo", ["run", "-p", "solitaire_server", "--quiet"], { cwd: repoRoot, env: { ...process.env, SQLX_OFFLINE: "true", DATABASE_URL: "sqlite://ferrous-solitaire-e2e.db", JWT_SECRET: "ferrous_solitaire_e2e_secret_32_chars_long", SERVER_PORT: String(serverPort), }, stdio: ["ignore", "pipe", "pipe"], }); } async function main() { const games = parsePositiveInt("--games", 1000); const maxSteps = parsePositiveInt("--steps", 350); const maxVisitsPerState = parsePositiveInt("--max-visits", 2); const policyArg = readArg("--policy", "loop_aware"); const policy = policyArg === "baseline" ? "baseline" : "loop_aware"; const outPath = readArg("--out", "/tmp/playwright-cycle-metrics.json"); // --route play-classic (default) or --route play const routeArg = readArg("--route", "play-classic"); const route = routeArg === "play" ? "play" : "play-classic"; const maxCycleRateAll = parseOptionalNumber("--max-cycle-rate-all"); const maxCycleRateDraw1 = parseOptionalNumber("--max-cycle-rate-draw1"); const maxCycleRateDraw3 = parseOptionalNumber("--max-cycle-rate-draw3"); const maxStepBudgetRateAll = parseOptionalNumber("--max-step-budget-rate-all"); const maxStepBudgetRateDraw1 = parseOptionalNumber("--max-step-budget-rate-draw1"); const maxStepBudgetRateDraw3 = parseOptionalNumber("--max-step-budget-rate-draw3"); const minWinRateAll = parseOptionalNumber("--min-win-rate-all"); const requireZeroIssues = hasFlag("--require-zero-issues"); const server = startServer(); let serverLog = ""; server.stdout.on("data", (d) => { serverLog += d.toString(); }); server.stderr.on("data", (d) => { serverLog += d.toString(); }); const cleanup = async () => { if (!server.killed) { server.kill("SIGTERM"); await new Promise((resolve) => setTimeout(resolve, 500)); if (!server.killed) server.kill("SIGKILL"); } }; try { if (!(await waitForHealth())) { throw new Error("server_failed_health_check"); } const browser = await chromium.launch({ headless: true }); const context = await browser.newContext(); const page = await context.newPage(); const results = []; for (let i = 0; i < games; i++) { const seed = i; const draw3 = i % 2 === 1; const suffix = draw3 ? "&draw3=" : ""; const pageErrors = []; const consoleErrors = []; page.removeAllListeners("pageerror"); page.removeAllListeners("console"); page.on("pageerror", (err) => pageErrors.push(String(err))); page.on("console", (msg) => { if (msg.type() === "error") { consoleErrors.push(msg.text()); } }); await page.goto(`${baseUrl}/${route}?seed=${seed}${suffix}`, { waitUntil: "domcontentloaded", }); if (route === "play-classic") { const resumeVisible = await page .locator("#resume-overlay:not(.hidden)") .isVisible() .catch(() => false); if (resumeVisible) { await page.evaluate(() => localStorage.removeItem("fs_game_save")); await page.reload({ waitUntil: "domcontentloaded" }); } } await page.waitForFunction( () => typeof window.__FERROUS_DEBUG__ === "object" && window.__FERROUS_DEBUG__.seed() !== null, null, { timeout: 30_000 } ); const run = await page.evaluate(({ stepCap, policyName, maxVisits }) => { const debug = window.__FERROUS_DEBUG__; const run = debug.runAutoplay({ maxSteps: stepCap, maxVisitsPerState: maxVisits, policy: policyName, }); const snap = run.snapshot || debug.snapshot(); const payload = debug.replayPayload(); return { terminal: run.terminal || "unknown", step: run.step ?? -1, error: run.error || run.reason || null, ok: !!run.ok, score: snap?.state?.score ?? null, move_count: snap?.state?.move_count ?? null, invariant_ok: !!snap?.invariants?.state_ok, history_len: Array.isArray(snap?.move_history) ? snap.move_history.length : null, replay_payload_present: payload !== null, replay_moves_len: Array.isArray(payload?.moves) ? payload.moves.length : 0, }; }, { stepCap: maxSteps, policyName: policy, maxVisits: maxVisitsPerState }); const issues = []; if (!run.ok) issues.push(`autoplay_failed:${run.error || run.terminal || "unknown"}`); if (!run.invariant_ok) issues.push("invariant_not_ok"); if (run.terminal === "invariant_failed") issues.push("invariant_failed_terminal"); if (run.terminal === "missing_snapshot") issues.push("missing_snapshot_terminal"); if (run.terminal === "apply_failed") issues.push(`apply_failed:${run.error}`); if (run.history_len > 0 && !run.replay_payload_present) { issues.push("missing_replay_payload_after_moves"); } if ( run.replay_payload_present && run.history_len !== null && run.replay_moves_len !== run.history_len ) { issues.push(`replay_history_mismatch:${run.replay_moves_len}/${run.history_len}`); } if (pageErrors.length > 0) issues.push(`page_errors:${pageErrors.length}`); if (consoleErrors.length > 0) issues.push(`console_errors:${consoleErrors.length}`); results.push({ game_index: i, seed, draw3, ...run, page_errors: pageErrors.length, console_errors: consoleErrors.length, issues, }); if ((i + 1) % 100 === 0) { console.error(`progress ${i + 1}/${games}`); } } const summary = buildSummary(results); const rates = { all: withRates(results, () => true), draw1: withRates(results, (r) => !r.draw3), draw3: withRates(results, (r) => r.draw3), }; const regressionFailures = []; if (maxCycleRateAll !== null && rates.all.cycle_rate_pct > maxCycleRateAll) { regressionFailures.push( `all.cycle_rate_pct ${rates.all.cycle_rate_pct}% > ${maxCycleRateAll}%` ); } if (maxCycleRateDraw1 !== null && rates.draw1.cycle_rate_pct > maxCycleRateDraw1) { regressionFailures.push( `draw1.cycle_rate_pct ${rates.draw1.cycle_rate_pct}% > ${maxCycleRateDraw1}%` ); } if (maxCycleRateDraw3 !== null && rates.draw3.cycle_rate_pct > maxCycleRateDraw3) { regressionFailures.push( `draw3.cycle_rate_pct ${rates.draw3.cycle_rate_pct}% > ${maxCycleRateDraw3}%` ); } if (minWinRateAll !== null && rates.all.win_rate_pct < minWinRateAll) { regressionFailures.push( `all.win_rate_pct ${rates.all.win_rate_pct}% < ${minWinRateAll}%` ); } if (maxStepBudgetRateAll !== null && rates.all.step_budget_rate_pct > maxStepBudgetRateAll) { regressionFailures.push( `all.step_budget_rate_pct ${rates.all.step_budget_rate_pct}% > ${maxStepBudgetRateAll}%` ); } if ( maxStepBudgetRateDraw1 !== null && rates.draw1.step_budget_rate_pct > maxStepBudgetRateDraw1 ) { regressionFailures.push( `draw1.step_budget_rate_pct ${rates.draw1.step_budget_rate_pct}% > ${maxStepBudgetRateDraw1}%` ); } if ( maxStepBudgetRateDraw3 !== null && rates.draw3.step_budget_rate_pct > maxStepBudgetRateDraw3 ) { regressionFailures.push( `draw3.step_budget_rate_pct ${rates.draw3.step_budget_rate_pct}% > ${maxStepBudgetRateDraw3}%` ); } if (requireZeroIssues) { if (summary.games_with_issues > 0) { regressionFailures.push(`games_with_issues ${summary.games_with_issues} > 0`); } if (summary.apply_failed > 0) { regressionFailures.push(`apply_failed ${summary.apply_failed} > 0`); } if (summary.invariant_failed > 0) { regressionFailures.push(`invariant_failed ${summary.invariant_failed} > 0`); } if (summary.missing_snapshot > 0) { regressionFailures.push(`missing_snapshot ${summary.missing_snapshot} > 0`); } if (summary.with_page_errors > 0) { regressionFailures.push(`with_page_errors ${summary.with_page_errors} > 0`); } if (summary.with_console_errors > 0) { regressionFailures.push(`with_console_errors ${summary.with_console_errors} > 0`); } } const out = { config: { games, max_steps: maxSteps, policy, max_visits_per_state: maxVisitsPerState, }, summary, rates, regression: { pass: regressionFailures.length === 0, thresholds: { max_cycle_rate_all: maxCycleRateAll, max_cycle_rate_draw1: maxCycleRateDraw1, max_cycle_rate_draw3: maxCycleRateDraw3, max_step_budget_rate_all: maxStepBudgetRateAll, max_step_budget_rate_draw1: maxStepBudgetRateDraw1, max_step_budget_rate_draw3: maxStepBudgetRateDraw3, min_win_rate_all: minWinRateAll, require_zero_issues: requireZeroIssues, }, failures: regressionFailures, }, issue_samples: results .filter((r) => r.issues.length > 0) .slice(0, 25) .map((r) => ({ seed: r.seed, draw3: r.draw3, terminal: r.terminal, issues: r.issues, score: r.score, move_count: r.move_count, })), cycle_samples: results .filter((r) => r.terminal === "cycle") .slice(0, 25) .map((r) => ({ seed: r.seed, draw3: r.draw3, step: r.step, score: r.score, move_count: r.move_count, })), }; fs.mkdirSync(path.dirname(outPath), { recursive: true }); fs.writeFileSync(outPath, JSON.stringify({ out, results }, null, 2)); console.log(JSON.stringify(out, null, 2)); if (regressionFailures.length > 0) { console.error(`regression check failed: ${regressionFailures.join("; ")}`); await browser.close(); await cleanup(); process.exit(2); } await browser.close(); await cleanup(); } catch (err) { const errorOut = { error: String(err), server_log_tail: serverLog.slice(-5000), }; fs.mkdirSync(path.dirname(outPath), { recursive: true }); fs.writeFileSync(outPath, JSON.stringify(errorOut, null, 2)); console.error(JSON.stringify(errorOut, null, 2)); await cleanup(); process.exit(1); } } main();