Files
Ferrous-Solitaire/solitaire_server/e2e/scripts/cycle_metrics.js
T
funman300 d45b7cb82b
Build and Deploy / build-and-push (push) Successful in 1m6s
Web E2E / web-e2e (push) Successful in 4m40s
feat(e2e): add Playwright browser test suite for web routes
solitaire_server/e2e/:
- smoke.spec.js: verifies /play-classic loads, exposes window.__FERROUS_DEBUG__
  bridge, keyboard parity (Space=draw, U=undo), debug failure report, and
  replay payload builder exports schema-v2 moves.
- gameplay_review.spec.js: HUD/controls render check, stock-click + undo
  player flow, draw-mode toggle, autonomous play invariant batch, and
  cycle-detection regression guard.
- cycle_metrics.js: headless cycle-rate analysis tool; run via
  `npm run review:cycles` with configurable policy, game count, and
  thresholds. Regression gate baked into package.json scripts.
- playwright.config.js: targets the local server at http://localhost:8080.
- package.json / package-lock.json: @playwright/test 1.60.0.

.gitea/workflows/web-e2e.yml:
- Runs on pushes to solitaire_server/, solitaire_wasm/, solitaire_core/,
  or Cargo changes. Starts the server binary, waits for /health, runs
  the full Playwright suite, uploads test-results/ on failure.

docs/testing-architecture.md: documents the three-tier test strategy
  (unit → Playwright smoke → cycle regression) and the __FERROUS_DEBUG__
  bridge contract.

scripts/update_quaternions_deps.sh: helper to bump the Quaternions
  registry deps (klondike, card_game) by version and run the full
  safety gate including deterministic replay checks.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-02 12:40:30 -07:00

378 lines
15 KiB
JavaScript

#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const { spawn } = require("child_process");
const { chromium } = require("playwright");
const serverPort = 4173;
const baseUrl = `http://127.0.0.1:${serverPort}`;
const repoRoot = path.resolve(__dirname, "../../..");
function readArg(name, fallback) {
const idx = process.argv.indexOf(name);
if (idx < 0 || idx + 1 >= process.argv.length) return fallback;
return process.argv[idx + 1];
}
function parsePositiveInt(name, fallback) {
const value = Number.parseInt(readArg(name, String(fallback)), 10);
if (Number.isNaN(value) || value <= 0) return fallback;
return value;
}
function parseOptionalNumber(name) {
const raw = readArg(name, null);
if (raw === null) return null;
const value = Number.parseFloat(raw);
return Number.isFinite(value) ? value : null;
}
function hasFlag(name) {
return process.argv.includes(name);
}
function buildSummary(results) {
return {
games_played: results.length,
won: results.filter((r) => r.terminal === "won").length,
no_moves: results.filter((r) => r.terminal === "no_moves").length,
cycle: results.filter((r) => r.terminal === "cycle").length,
step_budget: results.filter((r) => r.terminal === "step_budget").length,
apply_failed: results.filter((r) => r.terminal === "apply_failed").length,
invariant_failed: results.filter((r) => r.terminal === "invariant_failed").length,
missing_snapshot: results.filter((r) => r.terminal === "missing_snapshot").length,
games_with_issues: results.filter((r) => r.issues.length > 0).length,
with_page_errors: results.filter((r) => r.page_errors > 0).length,
with_console_errors: results.filter((r) => r.console_errors > 0).length,
draw1: results.filter((r) => !r.draw3).length,
draw3: results.filter((r) => r.draw3).length,
};
}
function withRates(results, predicate) {
const subset = results.filter(predicate);
const won = subset.filter((r) => r.terminal === "won").length;
const cycle = subset.filter((r) => r.terminal === "cycle").length;
const stepBudget = subset.filter((r) => r.terminal === "step_budget").length;
return {
games: subset.length,
won,
win_rate_pct: subset.length === 0 ? 0 : Number(((won / subset.length) * 100).toFixed(1)),
cycle,
cycle_rate_pct:
subset.length === 0 ? 0 : Number(((cycle / subset.length) * 100).toFixed(1)),
step_budget: stepBudget,
step_budget_rate_pct:
subset.length === 0 ? 0 : Number(((stepBudget / subset.length) * 100).toFixed(1)),
};
}
async function waitForHealth(timeoutMs = 120_000) {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
try {
const res = await fetch(`${baseUrl}/health`);
if (res.ok) return true;
} catch {
// server not ready yet
}
await new Promise((resolve) => setTimeout(resolve, 400));
}
return false;
}
function startServer() {
return spawn("cargo", ["run", "-p", "solitaire_server", "--quiet"], {
cwd: repoRoot,
env: {
...process.env,
SQLX_OFFLINE: "true",
DATABASE_URL: "sqlite://ferrous-solitaire-e2e.db",
JWT_SECRET: "ferrous_solitaire_e2e_secret_32_chars_long",
SERVER_PORT: String(serverPort),
},
stdio: ["ignore", "pipe", "pipe"],
});
}
async function main() {
const games = parsePositiveInt("--games", 1000);
const maxSteps = parsePositiveInt("--steps", 350);
const maxVisitsPerState = parsePositiveInt("--max-visits", 2);
const policyArg = readArg("--policy", "loop_aware");
const policy = policyArg === "baseline" ? "baseline" : "loop_aware";
const outPath = readArg("--out", "/tmp/playwright-cycle-metrics.json");
const maxCycleRateAll = parseOptionalNumber("--max-cycle-rate-all");
const maxCycleRateDraw1 = parseOptionalNumber("--max-cycle-rate-draw1");
const maxCycleRateDraw3 = parseOptionalNumber("--max-cycle-rate-draw3");
const maxStepBudgetRateAll = parseOptionalNumber("--max-step-budget-rate-all");
const maxStepBudgetRateDraw1 = parseOptionalNumber("--max-step-budget-rate-draw1");
const maxStepBudgetRateDraw3 = parseOptionalNumber("--max-step-budget-rate-draw3");
const minWinRateAll = parseOptionalNumber("--min-win-rate-all");
const requireZeroIssues = hasFlag("--require-zero-issues");
const server = startServer();
let serverLog = "";
server.stdout.on("data", (d) => {
serverLog += d.toString();
});
server.stderr.on("data", (d) => {
serverLog += d.toString();
});
const cleanup = async () => {
if (!server.killed) {
server.kill("SIGTERM");
await new Promise((resolve) => setTimeout(resolve, 500));
if (!server.killed) server.kill("SIGKILL");
}
};
try {
if (!(await waitForHealth())) {
throw new Error("server_failed_health_check");
}
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
const results = [];
for (let i = 0; i < games; i++) {
const seed = i;
const draw3 = i % 2 === 1;
const suffix = draw3 ? "&draw3=" : "";
const pageErrors = [];
const consoleErrors = [];
page.removeAllListeners("pageerror");
page.removeAllListeners("console");
page.on("pageerror", (err) => pageErrors.push(String(err)));
page.on("console", (msg) => {
if (msg.type() === "error") {
consoleErrors.push(msg.text());
}
});
await page.goto(`${baseUrl}/play-classic?seed=${seed}${suffix}`, {
waitUntil: "domcontentloaded",
});
const resumeVisible = await page
.locator("#resume-overlay:not(.hidden)")
.isVisible()
.catch(() => false);
if (resumeVisible) {
await page.evaluate(() => localStorage.removeItem("fs_game_save"));
await page.reload({ waitUntil: "domcontentloaded" });
}
await page.waitForFunction(
() =>
typeof window.__FERROUS_DEBUG__ === "object" &&
window.__FERROUS_DEBUG__.seed() !== null,
null,
{ timeout: 30_000 }
);
const run = await page.evaluate(({ stepCap, policyName, maxVisits }) => {
const debug = window.__FERROUS_DEBUG__;
const run = debug.runAutoplay({
maxSteps: stepCap,
maxVisitsPerState: maxVisits,
policy: policyName,
});
const snap = run.snapshot || debug.snapshot();
const payload = debug.replayPayload();
return {
terminal: run.terminal || "unknown",
step: run.step ?? -1,
error: run.error || run.reason || null,
ok: !!run.ok,
score: snap?.state?.score ?? null,
move_count: snap?.state?.move_count ?? null,
invariant_ok: !!snap?.invariants?.state_ok,
history_len: Array.isArray(snap?.move_history) ? snap.move_history.length : null,
replay_payload_present: payload !== null,
replay_moves_len: Array.isArray(payload?.moves) ? payload.moves.length : 0,
};
}, { stepCap: maxSteps, policyName: policy, maxVisits: maxVisitsPerState });
const issues = [];
if (!run.ok) issues.push(`autoplay_failed:${run.error || run.terminal || "unknown"}`);
if (!run.invariant_ok) issues.push("invariant_not_ok");
if (run.terminal === "invariant_failed") issues.push("invariant_failed_terminal");
if (run.terminal === "missing_snapshot") issues.push("missing_snapshot_terminal");
if (run.terminal === "apply_failed") issues.push(`apply_failed:${run.error}`);
if (run.history_len > 0 && !run.replay_payload_present) {
issues.push("missing_replay_payload_after_moves");
}
if (
run.replay_payload_present &&
run.history_len !== null &&
run.replay_moves_len !== run.history_len
) {
issues.push(`replay_history_mismatch:${run.replay_moves_len}/${run.history_len}`);
}
if (pageErrors.length > 0) issues.push(`page_errors:${pageErrors.length}`);
if (consoleErrors.length > 0) issues.push(`console_errors:${consoleErrors.length}`);
results.push({
game_index: i,
seed,
draw3,
...run,
page_errors: pageErrors.length,
console_errors: consoleErrors.length,
issues,
});
if ((i + 1) % 100 === 0) {
console.error(`progress ${i + 1}/${games}`);
}
}
const summary = buildSummary(results);
const rates = {
all: withRates(results, () => true),
draw1: withRates(results, (r) => !r.draw3),
draw3: withRates(results, (r) => r.draw3),
};
const regressionFailures = [];
if (maxCycleRateAll !== null && rates.all.cycle_rate_pct > maxCycleRateAll) {
regressionFailures.push(
`all.cycle_rate_pct ${rates.all.cycle_rate_pct}% > ${maxCycleRateAll}%`
);
}
if (maxCycleRateDraw1 !== null && rates.draw1.cycle_rate_pct > maxCycleRateDraw1) {
regressionFailures.push(
`draw1.cycle_rate_pct ${rates.draw1.cycle_rate_pct}% > ${maxCycleRateDraw1}%`
);
}
if (maxCycleRateDraw3 !== null && rates.draw3.cycle_rate_pct > maxCycleRateDraw3) {
regressionFailures.push(
`draw3.cycle_rate_pct ${rates.draw3.cycle_rate_pct}% > ${maxCycleRateDraw3}%`
);
}
if (minWinRateAll !== null && rates.all.win_rate_pct < minWinRateAll) {
regressionFailures.push(
`all.win_rate_pct ${rates.all.win_rate_pct}% < ${minWinRateAll}%`
);
}
if (maxStepBudgetRateAll !== null && rates.all.step_budget_rate_pct > maxStepBudgetRateAll) {
regressionFailures.push(
`all.step_budget_rate_pct ${rates.all.step_budget_rate_pct}% > ${maxStepBudgetRateAll}%`
);
}
if (
maxStepBudgetRateDraw1 !== null &&
rates.draw1.step_budget_rate_pct > maxStepBudgetRateDraw1
) {
regressionFailures.push(
`draw1.step_budget_rate_pct ${rates.draw1.step_budget_rate_pct}% > ${maxStepBudgetRateDraw1}%`
);
}
if (
maxStepBudgetRateDraw3 !== null &&
rates.draw3.step_budget_rate_pct > maxStepBudgetRateDraw3
) {
regressionFailures.push(
`draw3.step_budget_rate_pct ${rates.draw3.step_budget_rate_pct}% > ${maxStepBudgetRateDraw3}%`
);
}
if (requireZeroIssues) {
if (summary.games_with_issues > 0) {
regressionFailures.push(`games_with_issues ${summary.games_with_issues} > 0`);
}
if (summary.apply_failed > 0) {
regressionFailures.push(`apply_failed ${summary.apply_failed} > 0`);
}
if (summary.invariant_failed > 0) {
regressionFailures.push(`invariant_failed ${summary.invariant_failed} > 0`);
}
if (summary.missing_snapshot > 0) {
regressionFailures.push(`missing_snapshot ${summary.missing_snapshot} > 0`);
}
if (summary.with_page_errors > 0) {
regressionFailures.push(`with_page_errors ${summary.with_page_errors} > 0`);
}
if (summary.with_console_errors > 0) {
regressionFailures.push(`with_console_errors ${summary.with_console_errors} > 0`);
}
}
const out = {
config: {
games,
max_steps: maxSteps,
policy,
max_visits_per_state: maxVisitsPerState,
},
summary,
rates,
regression: {
pass: regressionFailures.length === 0,
thresholds: {
max_cycle_rate_all: maxCycleRateAll,
max_cycle_rate_draw1: maxCycleRateDraw1,
max_cycle_rate_draw3: maxCycleRateDraw3,
max_step_budget_rate_all: maxStepBudgetRateAll,
max_step_budget_rate_draw1: maxStepBudgetRateDraw1,
max_step_budget_rate_draw3: maxStepBudgetRateDraw3,
min_win_rate_all: minWinRateAll,
require_zero_issues: requireZeroIssues,
},
failures: regressionFailures,
},
issue_samples: results
.filter((r) => r.issues.length > 0)
.slice(0, 25)
.map((r) => ({
seed: r.seed,
draw3: r.draw3,
terminal: r.terminal,
issues: r.issues,
score: r.score,
move_count: r.move_count,
})),
cycle_samples: results
.filter((r) => r.terminal === "cycle")
.slice(0, 25)
.map((r) => ({
seed: r.seed,
draw3: r.draw3,
step: r.step,
score: r.score,
move_count: r.move_count,
})),
};
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, JSON.stringify({ out, results }, null, 2));
console.log(JSON.stringify(out, null, 2));
if (regressionFailures.length > 0) {
console.error(`regression check failed: ${regressionFailures.join("; ")}`);
await browser.close();
await cleanup();
process.exit(2);
}
await browser.close();
await cleanup();
} catch (err) {
const errorOut = {
error: String(err),
server_log_tail: serverLog.slice(-5000),
};
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, JSON.stringify(errorOut, null, 2));
console.error(JSON.stringify(errorOut, null, 2));
await cleanup();
process.exit(1);
}
}
main();