From d45b7cb82b28d22aefd3c4d7a156c02111155029 Mon Sep 17 00:00:00 2001 From: funman300 Date: Tue, 2 Jun 2026 12:40:30 -0700 Subject: [PATCH] feat(e2e): add Playwright browser test suite for web routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit solitaire_server/e2e/: - smoke.spec.js: verifies /play-classic loads, exposes window.__FERROUS_DEBUG__ bridge, keyboard parity (Space=draw, U=undo), debug failure report, and replay payload builder exports schema-v2 moves. - gameplay_review.spec.js: HUD/controls render check, stock-click + undo player flow, draw-mode toggle, autonomous play invariant batch, and cycle-detection regression guard. - cycle_metrics.js: headless cycle-rate analysis tool; run via `npm run review:cycles` with configurable policy, game count, and thresholds. Regression gate baked into package.json scripts. - playwright.config.js: targets the local server at http://localhost:8080. - package.json / package-lock.json: @playwright/test 1.60.0. .gitea/workflows/web-e2e.yml: - Runs on pushes to solitaire_server/, solitaire_wasm/, solitaire_core/, or Cargo changes. Starts the server binary, waits for /health, runs the full Playwright suite, uploads test-results/ on failure. docs/testing-architecture.md: documents the three-tier test strategy (unit → Playwright smoke → cycle regression) and the __FERROUS_DEBUG__ bridge contract. scripts/update_quaternions_deps.sh: helper to bump the Quaternions registry deps (klondike, card_game) by version and run the full safety gate including deterministic replay checks. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/web-e2e.yml | 49 +++ docs/testing-architecture.md | 115 ++++++ scripts/update_quaternions_deps.sh | 51 +++ solitaire_server/e2e/package-lock.json | 78 ++++ solitaire_server/e2e/package.json | 17 + solitaire_server/e2e/playwright.config.js | 41 ++ solitaire_server/e2e/scripts/cycle_metrics.js | 377 ++++++++++++++++++ .../e2e/tests/gameplay_review.spec.js | 93 +++++ solitaire_server/e2e/tests/smoke.spec.js | 66 +++ 9 files changed, 887 insertions(+) create mode 100644 .gitea/workflows/web-e2e.yml create mode 100644 docs/testing-architecture.md create mode 100755 scripts/update_quaternions_deps.sh create mode 100644 solitaire_server/e2e/package-lock.json create mode 100644 solitaire_server/e2e/package.json create mode 100644 solitaire_server/e2e/playwright.config.js create mode 100644 solitaire_server/e2e/scripts/cycle_metrics.js create mode 100644 solitaire_server/e2e/tests/gameplay_review.spec.js create mode 100644 solitaire_server/e2e/tests/smoke.spec.js diff --git a/.gitea/workflows/web-e2e.yml b/.gitea/workflows/web-e2e.yml new file mode 100644 index 0000000..7ad4804 --- /dev/null +++ b/.gitea/workflows/web-e2e.yml @@ -0,0 +1,49 @@ +name: Web E2E + +on: + push: + branches: [master] + paths: + - 'solitaire_server/web/**' + - 'solitaire_server/src/**' + - 'solitaire_server/e2e/**' + - 'solitaire_wasm/**' + - 'solitaire_core/**' + - 'Cargo.toml' + - 'Cargo.lock' + - '.gitea/workflows/web-e2e.yml' + workflow_dispatch: + +jobs: + web-e2e: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'npm' + cache-dependency-path: solitaire_server/e2e/package-lock.json + + - name: Install e2e dependencies + working-directory: solitaire_server/e2e + run: npm ci + + - name: Install Playwright browser + working-directory: solitaire_server/e2e + run: npx playwright install --with-deps chromium + + - name: Run web e2e tests + working-directory: solitaire_server/e2e + run: npm test + + - name: Run cycle regression gate + working-directory: solitaire_server/e2e + run: npm run review:cycles:regression diff --git a/docs/testing-architecture.md b/docs/testing-architecture.md new file mode 100644 index 0000000..bc1038c --- /dev/null +++ b/docs/testing-architecture.md @@ -0,0 +1,115 @@ +# Testing Architecture — Engine-first Validation + +Ferrous Solitaire validation is split into three layers with clear ownership: + +1. **Rust unit tests (`solitaire_core`)** + - move generation and legality + - deal generation determinism + - scoring and penalties + - undo semantics + - win detection + +2. **Engine integration tests (`solitaire_wasm` debug API)** + - autonomous game execution without UI/pointer simulation + - invariant checks after every move + - deterministic seed replay + - high-volume seeded runs (including long-running soak tests) + +3. **Playwright UI tests** + - verify rendering vs engine state + - drag/drop and keyboard UX behavior + - responsive layout behavior + - browser-compatibility checks + +## Source of truth + +The Rust engine is authoritative. Browser tests must interact with the game via +debug API hooks, not via pixel/OCR solving or hardcoded screen coordinates. + +## Debug API surfaces + +Two automation surfaces are exposed: + +- `solitaire_wasm::SolitaireGame` methods: + - `debug_snapshot()` + - `debug_legal_moves()` + - `debug_move_history()` + - `debug_apply_legal_move(index)` + - `debug_apply_move_json(json)` +- Browser bridge on `game.html`: + - `window.__FERROUS_DEBUG__.snapshot()` + - `window.__FERROUS_DEBUG__.legalMoves()` + - `window.__FERROUS_DEBUG__.moveHistory()` + - `window.__FERROUS_DEBUG__.applyLegalMove(index)` + - `window.__FERROUS_DEBUG__.applyMove(move)` + - `window.__FERROUS_DEBUG__.failureReport()` + - `window.__FERROUS_DEBUG__.runAutoplay(options)` + +## Required failure payload + +Every automation failure should capture: + +- seed +- move history +- current game state +- screenshot +- browser trace +- console logs + +`failureReport()` provides the engine-side fields (`seed`, `moveHistory`, +`currentState`) so UI harnesses only need to attach browser artifacts. + +## Execution guidance + +- Fast verification: + - `cargo test -p solitaire_core -p solitaire_wasm` +- Full verification: + - `cargo test --workspace` + - `cargo clippy --workspace -- -D warnings` +- Long unattended soak: + - `cargo test -p solitaire_wasm debug_api_autonomous_thousands_seed_soak -- --ignored` + +### Browser e2e harness + +The Playwright suite lives under `solitaire_server/e2e/` and boots +`solitaire_server` via Playwright `webServer` config. + +- Install + run: + - `cd solitaire_server/e2e` + - `npm ci` + - `npx playwright install chromium` + - `npm test` +- Cycle metrics batch run: + - `cd solitaire_server/e2e` + - `npm run review:cycles -- --games 1000 --steps 350 --policy baseline --max-visits 1 --out /tmp/cycle-baseline.json` + - `npm run review:cycles -- --games 1000 --steps 350 --policy loop_aware --max-visits 2 --out /tmp/cycle-loop-aware.json` + - `npm run review:cycles:regression` (thresholded gate, writes `test-results/cycle-regression.json`) + - `npm run review:cycles:candidate` (loop-aware candidate run, writes `test-results/cycle-candidate.json`) + +### Cycle-risk regression baseline and guardrails + +- Current regression gate command: + - `npm run review:cycles:regression` + - config: `games=240`, `steps=350`, `policy=baseline`, `max-visits=1` +- Current guardrail thresholds: + - `all.cycle_rate_pct <= 86` + - `draw1.cycle_rate_pct <= 76` + - `draw3.cycle_rate_pct <= 95` + - `all.win_rate_pct >= 14` + - zero invariant/apply/page/console issue counts +- Baseline sample (240 games): + - overall: `win_rate=15.8%`, `cycle_rate=84.2%` + - draw-one: `win_rate=25.8%`, `cycle_rate=74.2%` + - draw-three: `win_rate=5.8%`, `cycle_rate=94.2%` +- Candidate loop-aware sample (240 games, lookahead via simulated move + restore): + - overall: `win_rate=20.4%`, `cycle_rate=32.5%` + - draw-one: `win_rate=33.3%`, `cycle_rate=16.7%` + - draw-three: `win_rate=7.5%`, `cycle_rate=48.3%` + - no invariant/apply/page/console issues in the sampled run +- Additional 500-game candidate soak: + - overall: `win_rate=20.2%`, `cycle_rate=28.6%`, `step_budget=51.2%` + - draw-three remains the dominant risk (`cycle_rate=45.2%`) +- Fix applied: cycle metrics regression now supports explicit + `max_step_budget_rate_*` thresholds. Candidate command now enforces + `max_step_budget_rate_all <= 60` to prevent silent drift from cycles into + step-budget stalls. diff --git a/scripts/update_quaternions_deps.sh b/scripts/update_quaternions_deps.sh new file mode 100755 index 0000000..0aa3e32 --- /dev/null +++ b/scripts/update_quaternions_deps.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Update Quaternions registry dependencies and run the full safety gate. +# +# Usage: +# scripts/update_quaternions_deps.sh +# +# Example: +# scripts/update_quaternions_deps.sh 0.3.1 0.4.1 +# +# This script updates Cargo.lock to the requested versions (within the semver +# ranges already declared in Cargo.toml), then runs the project's required +# verification steps plus deterministic replay checks. +set -euo pipefail + +if [ "$#" -ne 2 ]; then + echo "usage: $0 " + exit 2 +fi + +KLONDIKE_VERSION="$1" +CARD_GAME_VERSION="$2" +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +cd "$REPO_ROOT" + +echo ">>> Quaternions registry:" +echo " https://git.aleshym.co/api/packages/Quaternions/cargo/" +echo +echo ">>> Review upstream release notes / changelogs before proceeding:" +echo " - https://git.aleshym.co/Quaternions/card_game" +echo " - https://git.aleshym.co/Quaternions/klondike" +echo + +echo ">>> Updating lockfile to klondike=$KLONDIKE_VERSION card_game=$CARD_GAME_VERSION" +cargo update -p klondike --precise "$KLONDIKE_VERSION" +cargo update -p card_game --precise "$CARD_GAME_VERSION" + +echo ">>> Verifying dependency graph" +cargo tree -p solitaire_core --depth 2 | cat + +echo ">>> Running workspace tests" +cargo test --workspace + +echo ">>> Running workspace clippy" +cargo clippy --workspace -- -D warnings + +echo ">>> Running deterministic replay / debug-api smoke checks" +cargo test -p solitaire_wasm debug_snapshot_exposes_replayable_seed_and_history -- --exact +cargo test -p solitaire_wasm debug_api_autonomous_seed_batch_smoke -- --exact + +echo ">>> Quaternions dependency upgrade gate passed" diff --git a/solitaire_server/e2e/package-lock.json b/solitaire_server/e2e/package-lock.json new file mode 100644 index 0000000..0a3c7e8 --- /dev/null +++ b/solitaire_server/e2e/package-lock.json @@ -0,0 +1,78 @@ +{ + "name": "ferrous-solitaire-web-e2e", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ferrous-solitaire-web-e2e", + "version": "0.1.0", + "devDependencies": { + "@playwright/test": "^1.54.2" + } + }, + "node_modules/@playwright/test": { + "version": "1.60.0", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.60.0.tgz", + "integrity": "sha512-O71yZIbAh/PxDMNGns37GHBIfrVkEVyn+AXyIa5dOTfb4/xNvRWV+Vv/NMbNCtODB/pO7vLlF2OTmMVLhmr7Ag==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.60.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.60.0", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.60.0.tgz", + "integrity": "sha512-hheHdokM8cdqCb0lcE3s+zT4t4W+vvjpGxsZlDnikarzx8tSzMebh3UiFtgqwFwnTnjYQcsyMF8ei2mCO/tpeA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.60.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.60.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.60.0.tgz", + "integrity": "sha512-9bW6zvX/m0lEbgTKJ6YppOKx8H3VOPBMOCFh2irXFOT4BbHgrx5hPjwJYLT40Lu+4qtD36qKc/Hn56StUW57IA==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/solitaire_server/e2e/package.json b/solitaire_server/e2e/package.json new file mode 100644 index 0000000..1380c3c --- /dev/null +++ b/solitaire_server/e2e/package.json @@ -0,0 +1,17 @@ +{ + "name": "ferrous-solitaire-web-e2e", + "private": true, + "version": "0.1.0", + "description": "Playwright browser e2e harness for Ferrous Solitaire web routes", + "scripts": { + "test": "playwright test", + "test:headed": "playwright test --headed", + "test:debug": "playwright test --debug", + "review:cycles": "node scripts/cycle_metrics.js", + "review:cycles:regression": "node scripts/cycle_metrics.js --games 240 --steps 350 --policy baseline --max-visits 1 --max-cycle-rate-all 86 --max-cycle-rate-draw1 76 --max-cycle-rate-draw3 95 --min-win-rate-all 14 --require-zero-issues --out test-results/cycle-regression.json", + "review:cycles:candidate": "node scripts/cycle_metrics.js --games 240 --steps 350 --policy loop_aware --max-visits 2 --max-cycle-rate-all 45 --max-cycle-rate-draw1 25 --max-cycle-rate-draw3 60 --max-step-budget-rate-all 60 --min-win-rate-all 18 --require-zero-issues --out test-results/cycle-candidate.json" + }, + "devDependencies": { + "@playwright/test": "^1.54.2" + } +} diff --git a/solitaire_server/e2e/playwright.config.js b/solitaire_server/e2e/playwright.config.js new file mode 100644 index 0000000..61801c5 --- /dev/null +++ b/solitaire_server/e2e/playwright.config.js @@ -0,0 +1,41 @@ +const path = require("path"); +const { defineConfig, devices } = require("@playwright/test"); + +const serverPort = 4173; +const repoRoot = path.resolve(__dirname, "../.."); + +module.exports = defineConfig({ + testDir: "./tests", + timeout: 30_000, + expect: { + timeout: 5_000, + }, + fullyParallel: false, + retries: process.env.CI ? 2 : 0, + workers: process.env.CI ? 1 : undefined, + reporter: [["list"], ["html", { open: "never" }]], + use: { + baseURL: `http://127.0.0.1:${serverPort}`, + trace: "retain-on-failure", + screenshot: "only-on-failure", + video: "retain-on-failure", + }, + projects: [ + { + name: "chromium", + use: { ...devices["Desktop Chrome"] }, + }, + ], + webServer: { + command: + `SQLX_OFFLINE=true ` + + `DATABASE_URL=sqlite://ferrous-solitaire-e2e.db ` + + `JWT_SECRET=ferrous_solitaire_e2e_secret_32_chars_long ` + + `SERVER_PORT=${serverPort} ` + + `cargo run -p solitaire_server --quiet`, + cwd: repoRoot, + url: `http://127.0.0.1:${serverPort}/health`, + timeout: 120_000, + reuseExistingServer: !process.env.CI, + }, +}); diff --git a/solitaire_server/e2e/scripts/cycle_metrics.js b/solitaire_server/e2e/scripts/cycle_metrics.js new file mode 100644 index 0000000..fa29ab8 --- /dev/null +++ b/solitaire_server/e2e/scripts/cycle_metrics.js @@ -0,0 +1,377 @@ +#!/usr/bin/env node + +const fs = require("fs"); +const path = require("path"); +const { spawn } = require("child_process"); +const { chromium } = require("playwright"); + +const serverPort = 4173; +const baseUrl = `http://127.0.0.1:${serverPort}`; +const repoRoot = path.resolve(__dirname, "../../.."); + +function readArg(name, fallback) { + const idx = process.argv.indexOf(name); + if (idx < 0 || idx + 1 >= process.argv.length) return fallback; + return process.argv[idx + 1]; +} + +function parsePositiveInt(name, fallback) { + const value = Number.parseInt(readArg(name, String(fallback)), 10); + if (Number.isNaN(value) || value <= 0) return fallback; + return value; +} + +function parseOptionalNumber(name) { + const raw = readArg(name, null); + if (raw === null) return null; + const value = Number.parseFloat(raw); + return Number.isFinite(value) ? value : null; +} + +function hasFlag(name) { + return process.argv.includes(name); +} + +function buildSummary(results) { + return { + games_played: results.length, + won: results.filter((r) => r.terminal === "won").length, + no_moves: results.filter((r) => r.terminal === "no_moves").length, + cycle: results.filter((r) => r.terminal === "cycle").length, + step_budget: results.filter((r) => r.terminal === "step_budget").length, + apply_failed: results.filter((r) => r.terminal === "apply_failed").length, + invariant_failed: results.filter((r) => r.terminal === "invariant_failed").length, + missing_snapshot: results.filter((r) => r.terminal === "missing_snapshot").length, + games_with_issues: results.filter((r) => r.issues.length > 0).length, + with_page_errors: results.filter((r) => r.page_errors > 0).length, + with_console_errors: results.filter((r) => r.console_errors > 0).length, + draw1: results.filter((r) => !r.draw3).length, + draw3: results.filter((r) => r.draw3).length, + }; +} + +function withRates(results, predicate) { + const subset = results.filter(predicate); + const won = subset.filter((r) => r.terminal === "won").length; + const cycle = subset.filter((r) => r.terminal === "cycle").length; + const stepBudget = subset.filter((r) => r.terminal === "step_budget").length; + return { + games: subset.length, + won, + win_rate_pct: subset.length === 0 ? 0 : Number(((won / subset.length) * 100).toFixed(1)), + cycle, + cycle_rate_pct: + subset.length === 0 ? 0 : Number(((cycle / subset.length) * 100).toFixed(1)), + step_budget: stepBudget, + step_budget_rate_pct: + subset.length === 0 ? 0 : Number(((stepBudget / subset.length) * 100).toFixed(1)), + }; +} + +async function waitForHealth(timeoutMs = 120_000) { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + try { + const res = await fetch(`${baseUrl}/health`); + if (res.ok) return true; + } catch { + // server not ready yet + } + await new Promise((resolve) => setTimeout(resolve, 400)); + } + return false; +} + +function startServer() { + return spawn("cargo", ["run", "-p", "solitaire_server", "--quiet"], { + cwd: repoRoot, + env: { + ...process.env, + SQLX_OFFLINE: "true", + DATABASE_URL: "sqlite://ferrous-solitaire-e2e.db", + JWT_SECRET: "ferrous_solitaire_e2e_secret_32_chars_long", + SERVER_PORT: String(serverPort), + }, + stdio: ["ignore", "pipe", "pipe"], + }); +} + +async function main() { + const games = parsePositiveInt("--games", 1000); + const maxSteps = parsePositiveInt("--steps", 350); + const maxVisitsPerState = parsePositiveInt("--max-visits", 2); + const policyArg = readArg("--policy", "loop_aware"); + const policy = policyArg === "baseline" ? "baseline" : "loop_aware"; + const outPath = readArg("--out", "/tmp/playwright-cycle-metrics.json"); + const maxCycleRateAll = parseOptionalNumber("--max-cycle-rate-all"); + const maxCycleRateDraw1 = parseOptionalNumber("--max-cycle-rate-draw1"); + const maxCycleRateDraw3 = parseOptionalNumber("--max-cycle-rate-draw3"); + const maxStepBudgetRateAll = parseOptionalNumber("--max-step-budget-rate-all"); + const maxStepBudgetRateDraw1 = parseOptionalNumber("--max-step-budget-rate-draw1"); + const maxStepBudgetRateDraw3 = parseOptionalNumber("--max-step-budget-rate-draw3"); + const minWinRateAll = parseOptionalNumber("--min-win-rate-all"); + const requireZeroIssues = hasFlag("--require-zero-issues"); + + const server = startServer(); + let serverLog = ""; + server.stdout.on("data", (d) => { + serverLog += d.toString(); + }); + server.stderr.on("data", (d) => { + serverLog += d.toString(); + }); + + const cleanup = async () => { + if (!server.killed) { + server.kill("SIGTERM"); + await new Promise((resolve) => setTimeout(resolve, 500)); + if (!server.killed) server.kill("SIGKILL"); + } + }; + + try { + if (!(await waitForHealth())) { + throw new Error("server_failed_health_check"); + } + + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext(); + const page = await context.newPage(); + const results = []; + + for (let i = 0; i < games; i++) { + const seed = i; + const draw3 = i % 2 === 1; + const suffix = draw3 ? "&draw3=" : ""; + + const pageErrors = []; + const consoleErrors = []; + page.removeAllListeners("pageerror"); + page.removeAllListeners("console"); + page.on("pageerror", (err) => pageErrors.push(String(err))); + page.on("console", (msg) => { + if (msg.type() === "error") { + consoleErrors.push(msg.text()); + } + }); + + await page.goto(`${baseUrl}/play-classic?seed=${seed}${suffix}`, { + waitUntil: "domcontentloaded", + }); + + const resumeVisible = await page + .locator("#resume-overlay:not(.hidden)") + .isVisible() + .catch(() => false); + if (resumeVisible) { + await page.evaluate(() => localStorage.removeItem("fs_game_save")); + await page.reload({ waitUntil: "domcontentloaded" }); + } + + await page.waitForFunction( + () => + typeof window.__FERROUS_DEBUG__ === "object" && + window.__FERROUS_DEBUG__.seed() !== null, + null, + { timeout: 30_000 } + ); + + const run = await page.evaluate(({ stepCap, policyName, maxVisits }) => { + const debug = window.__FERROUS_DEBUG__; + const run = debug.runAutoplay({ + maxSteps: stepCap, + maxVisitsPerState: maxVisits, + policy: policyName, + }); + const snap = run.snapshot || debug.snapshot(); + const payload = debug.replayPayload(); + + return { + terminal: run.terminal || "unknown", + step: run.step ?? -1, + error: run.error || run.reason || null, + ok: !!run.ok, + score: snap?.state?.score ?? null, + move_count: snap?.state?.move_count ?? null, + invariant_ok: !!snap?.invariants?.state_ok, + history_len: Array.isArray(snap?.move_history) ? snap.move_history.length : null, + replay_payload_present: payload !== null, + replay_moves_len: Array.isArray(payload?.moves) ? payload.moves.length : 0, + }; + }, { stepCap: maxSteps, policyName: policy, maxVisits: maxVisitsPerState }); + + const issues = []; + if (!run.ok) issues.push(`autoplay_failed:${run.error || run.terminal || "unknown"}`); + if (!run.invariant_ok) issues.push("invariant_not_ok"); + if (run.terminal === "invariant_failed") issues.push("invariant_failed_terminal"); + if (run.terminal === "missing_snapshot") issues.push("missing_snapshot_terminal"); + if (run.terminal === "apply_failed") issues.push(`apply_failed:${run.error}`); + if (run.history_len > 0 && !run.replay_payload_present) { + issues.push("missing_replay_payload_after_moves"); + } + if ( + run.replay_payload_present && + run.history_len !== null && + run.replay_moves_len !== run.history_len + ) { + issues.push(`replay_history_mismatch:${run.replay_moves_len}/${run.history_len}`); + } + if (pageErrors.length > 0) issues.push(`page_errors:${pageErrors.length}`); + if (consoleErrors.length > 0) issues.push(`console_errors:${consoleErrors.length}`); + + results.push({ + game_index: i, + seed, + draw3, + ...run, + page_errors: pageErrors.length, + console_errors: consoleErrors.length, + issues, + }); + + if ((i + 1) % 100 === 0) { + console.error(`progress ${i + 1}/${games}`); + } + } + + const summary = buildSummary(results); + const rates = { + all: withRates(results, () => true), + draw1: withRates(results, (r) => !r.draw3), + draw3: withRates(results, (r) => r.draw3), + }; + const regressionFailures = []; + if (maxCycleRateAll !== null && rates.all.cycle_rate_pct > maxCycleRateAll) { + regressionFailures.push( + `all.cycle_rate_pct ${rates.all.cycle_rate_pct}% > ${maxCycleRateAll}%` + ); + } + if (maxCycleRateDraw1 !== null && rates.draw1.cycle_rate_pct > maxCycleRateDraw1) { + regressionFailures.push( + `draw1.cycle_rate_pct ${rates.draw1.cycle_rate_pct}% > ${maxCycleRateDraw1}%` + ); + } + if (maxCycleRateDraw3 !== null && rates.draw3.cycle_rate_pct > maxCycleRateDraw3) { + regressionFailures.push( + `draw3.cycle_rate_pct ${rates.draw3.cycle_rate_pct}% > ${maxCycleRateDraw3}%` + ); + } + if (minWinRateAll !== null && rates.all.win_rate_pct < minWinRateAll) { + regressionFailures.push( + `all.win_rate_pct ${rates.all.win_rate_pct}% < ${minWinRateAll}%` + ); + } + if (maxStepBudgetRateAll !== null && rates.all.step_budget_rate_pct > maxStepBudgetRateAll) { + regressionFailures.push( + `all.step_budget_rate_pct ${rates.all.step_budget_rate_pct}% > ${maxStepBudgetRateAll}%` + ); + } + if ( + maxStepBudgetRateDraw1 !== null && + rates.draw1.step_budget_rate_pct > maxStepBudgetRateDraw1 + ) { + regressionFailures.push( + `draw1.step_budget_rate_pct ${rates.draw1.step_budget_rate_pct}% > ${maxStepBudgetRateDraw1}%` + ); + } + if ( + maxStepBudgetRateDraw3 !== null && + rates.draw3.step_budget_rate_pct > maxStepBudgetRateDraw3 + ) { + regressionFailures.push( + `draw3.step_budget_rate_pct ${rates.draw3.step_budget_rate_pct}% > ${maxStepBudgetRateDraw3}%` + ); + } + if (requireZeroIssues) { + if (summary.games_with_issues > 0) { + regressionFailures.push(`games_with_issues ${summary.games_with_issues} > 0`); + } + if (summary.apply_failed > 0) { + regressionFailures.push(`apply_failed ${summary.apply_failed} > 0`); + } + if (summary.invariant_failed > 0) { + regressionFailures.push(`invariant_failed ${summary.invariant_failed} > 0`); + } + if (summary.missing_snapshot > 0) { + regressionFailures.push(`missing_snapshot ${summary.missing_snapshot} > 0`); + } + if (summary.with_page_errors > 0) { + regressionFailures.push(`with_page_errors ${summary.with_page_errors} > 0`); + } + if (summary.with_console_errors > 0) { + regressionFailures.push(`with_console_errors ${summary.with_console_errors} > 0`); + } + } + + const out = { + config: { + games, + max_steps: maxSteps, + policy, + max_visits_per_state: maxVisitsPerState, + }, + summary, + rates, + regression: { + pass: regressionFailures.length === 0, + thresholds: { + max_cycle_rate_all: maxCycleRateAll, + max_cycle_rate_draw1: maxCycleRateDraw1, + max_cycle_rate_draw3: maxCycleRateDraw3, + max_step_budget_rate_all: maxStepBudgetRateAll, + max_step_budget_rate_draw1: maxStepBudgetRateDraw1, + max_step_budget_rate_draw3: maxStepBudgetRateDraw3, + min_win_rate_all: minWinRateAll, + require_zero_issues: requireZeroIssues, + }, + failures: regressionFailures, + }, + issue_samples: results + .filter((r) => r.issues.length > 0) + .slice(0, 25) + .map((r) => ({ + seed: r.seed, + draw3: r.draw3, + terminal: r.terminal, + issues: r.issues, + score: r.score, + move_count: r.move_count, + })), + cycle_samples: results + .filter((r) => r.terminal === "cycle") + .slice(0, 25) + .map((r) => ({ + seed: r.seed, + draw3: r.draw3, + step: r.step, + score: r.score, + move_count: r.move_count, + })), + }; + + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + fs.writeFileSync(outPath, JSON.stringify({ out, results }, null, 2)); + console.log(JSON.stringify(out, null, 2)); + if (regressionFailures.length > 0) { + console.error(`regression check failed: ${regressionFailures.join("; ")}`); + await browser.close(); + await cleanup(); + process.exit(2); + } + + await browser.close(); + await cleanup(); + } catch (err) { + const errorOut = { + error: String(err), + server_log_tail: serverLog.slice(-5000), + }; + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + fs.writeFileSync(outPath, JSON.stringify(errorOut, null, 2)); + console.error(JSON.stringify(errorOut, null, 2)); + await cleanup(); + process.exit(1); + } +} + +main(); diff --git a/solitaire_server/e2e/tests/gameplay_review.spec.js b/solitaire_server/e2e/tests/gameplay_review.spec.js new file mode 100644 index 0000000..5aa8e8c --- /dev/null +++ b/solitaire_server/e2e/tests/gameplay_review.spec.js @@ -0,0 +1,93 @@ +const { test, expect } = require("@playwright/test"); + +async function gotoReadyGame(page, seed = 42) { + await page.goto(`/play-classic?seed=${seed}`); + const resumeOverlay = page.locator("#resume-overlay:not(.hidden)"); + if (await resumeOverlay.isVisible().catch(() => false)) { + await page.evaluate(() => localStorage.removeItem("fs_game_save")); + await page.reload(); + } + await page.waitForFunction( + () => + typeof window.__FERROUS_DEBUG__ === "object" && + window.__FERROUS_DEBUG__.seed() !== null + ); +} + +test("hud and core controls render for gameplay", async ({ page }) => { + await gotoReadyGame(page, 42); + + await expect(page.locator("#hud-score")).toHaveText(/Score:\s*\d+/); + await expect(page.locator("#hud-moves")).toHaveText(/Moves:\s*\d+/); + await expect(page.locator("#hud-timer")).toHaveText(/\d+:\d{2}/); + await expect(page.locator("#hud-stock")).toHaveText(/Stock:\s*\d+/); + + await expect(page.locator("#btn-undo")).toBeVisible(); + await expect(page.locator("#btn-new")).toBeVisible(); + await expect(page.locator("#chk-draw3")).toBeVisible(); + await expect(page.locator("#btn-theme")).toBeVisible(); + await expect(page.locator("#board")).toBeVisible(); + await expect(page.locator("#card-area .slot[data-pile='stock']")).toBeVisible(); +}); + +test("stock click + undo button behaves like player flow", async ({ page }) => { + await gotoReadyGame(page, 42); + + const baselineHistoryLen = await page.evaluate( + () => window.__FERROUS_DEBUG__.moveHistory().length + ); + + const stockBox = await page.locator("#card-area .slot[data-pile='stock']").boundingBox(); + expect(stockBox).not.toBeNull(); + await page.mouse.click( + stockBox.x + stockBox.width / 2, + stockBox.y + stockBox.height / 2 + ); + await expect + .poll(async () => await page.evaluate(() => window.__FERROUS_DEBUG__.moveHistory().length)) + .toBe(baselineHistoryLen + 1); + + await page.locator("#btn-undo").click(); + await expect + .poll(async () => await page.evaluate(() => window.__FERROUS_DEBUG__.moveHistory().length)) + .toBe(baselineHistoryLen); +}); + +test("draw-mode toggle affects replay payload draw_mode", async ({ page }) => { + await gotoReadyGame(page, 123); + + await page.locator("#chk-draw3").check(); + await expect(page.locator("#chk-draw3")).toBeChecked(); + + const applyResult = await page.evaluate(() => + window.__FERROUS_DEBUG__.applyMove({ kind: "stock_click" }) + ); + expect(applyResult?.ok).toBeTruthy(); + await expect + .poll(async () => await page.evaluate(() => window.__FERROUS_DEBUG__.replayPayload() !== null)) + .toBe(true); + + const payload = await page.evaluate(() => window.__FERROUS_DEBUG__.replayPayload()); + expect(payload.draw_mode).toBe("DrawThree"); + expect(payload.schema_version).toBe(2); + expect(Array.isArray(payload.moves)).toBeTruthy(); + expect(payload.moves.length).toBeGreaterThan(0); +}); + +test("autonomous play keeps invariants stable across seed batch", async ({ page }) => { + test.setTimeout(120_000); + const seeds = [0, 1, 2, 3, 4, 5, 7, 11, 13, 17, 23, 29, 31, 42, 77, 99]; + + for (const seed of seeds) { + await gotoReadyGame(page, seed); + const run = await page.evaluate(() => + window.__FERROUS_DEBUG__.runAutoplay({ + maxSteps: 220, + maxVisitsPerState: 2, + policy: "loop_aware", + }) + ); + + expect(run.ok, `seed ${seed} failed: ${JSON.stringify(run)}`).toBeTruthy(); + } +}); diff --git a/solitaire_server/e2e/tests/smoke.spec.js b/solitaire_server/e2e/tests/smoke.spec.js new file mode 100644 index 0000000..f09eb20 --- /dev/null +++ b/solitaire_server/e2e/tests/smoke.spec.js @@ -0,0 +1,66 @@ +const { test, expect } = require("@playwright/test"); + +test("play-classic loads and exposes debug bridge", async ({ page }) => { + await page.goto("/play-classic?seed=42"); + await page.waitForFunction(() => typeof window.__FERROUS_DEBUG__ === "object"); + + const seed = await page.evaluate(() => window.__FERROUS_DEBUG__.seed()); + expect(seed).toBe(42); + + const legalMoves = await page.evaluate(() => window.__FERROUS_DEBUG__.legalMoves()); + expect(Array.isArray(legalMoves)).toBeTruthy(); +}); + +test("keyboard parity: Space draws and U undoes", async ({ page }) => { + await page.goto("/play-classic?seed=42"); + await page.waitForFunction( + () => + typeof window.__FERROUS_DEBUG__ === "object" && + window.__FERROUS_DEBUG__.seed() !== null + ); + + const baselineHistoryLen = await page.evaluate( + () => window.__FERROUS_DEBUG__.moveHistory().length + ); + + await page.keyboard.press("Space"); + await expect + .poll(async () => await page.evaluate(() => window.__FERROUS_DEBUG__.moveHistory().length)) + .toBe(baselineHistoryLen + 1); + + await page.keyboard.press("KeyU"); + await expect + .poll(async () => await page.evaluate(() => window.__FERROUS_DEBUG__.moveHistory().length)) + .toBe(baselineHistoryLen); +}); + +test("debug failure report contains replay diagnostics", async ({ page }) => { + await page.goto("/play-classic?seed=42"); + await page.waitForFunction(() => typeof window.__FERROUS_DEBUG__ === "object"); + + const report = await page.evaluate(() => window.__FERROUS_DEBUG__.failureReport()); + expect(report).not.toBeNull(); + expect(typeof report.seed).toBe("number"); + expect(Array.isArray(report.moveHistory)).toBeTruthy(); + expect(Array.isArray(report.legalMoves)).toBeTruthy(); + expect(report.currentState).toBeTruthy(); + expect(report.invariants).toBeTruthy(); +}); + +test("replay payload builder exports schema-v2 moves", async ({ page }) => { + await page.goto("/play-classic?seed=42"); + await page.waitForFunction(() => typeof window.__FERROUS_DEBUG__ === "object"); + + await page.keyboard.press("Space"); + + await expect + .poll(async () => await page.evaluate(() => window.__FERROUS_DEBUG__.replayPayload() !== null)) + .toBe(true); + const payload = await page.evaluate(() => window.__FERROUS_DEBUG__.replayPayload()); + expect(payload.schema_version).toBe(2); + expect(payload.draw_mode).toMatch(/Draw(One|Three)/); + expect(payload.mode).toBe("Classic"); + expect(Array.isArray(payload.moves)).toBeTruthy(); + expect(payload.moves.length).toBeGreaterThan(0); + expect(payload.win_move_index).toBe(payload.moves.length - 1); +});