diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml new file mode 100644 index 0000000..d6fcd88 --- /dev/null +++ b/.github/workflows/benchmark.yaml @@ -0,0 +1,126 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/refs/heads/master/src/schemas/json/github-workflow.json +name: Benchmark PR + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + +jobs: + bench: + name: Run benchmark & compare against base + runs-on: ubuntu-24.04 + if: ${{ github.repository_owner == 'query-doctor' }} + timeout-minutes: 45 + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout PR (full history) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: 24 + cache: npm + + - name: Resolve base commit + id: base + run: | + git fetch origin ${{ github.base_ref }} --depth=1 + echo "sha=$(git merge-base origin/${{ github.base_ref }} HEAD)" >> "$GITHUB_OUTPUT" + + - name: Install dependencies (PR) + run: npm ci + + - name: Run benchmark on PR + run: npx vitest bench --run --outputJson=/tmp/bench-pr.json + + - name: Preserve PR bench source + # Base may predate this file or its API; we run base's source against + # the PR's bench definition so we're measuring code changes, not bench changes. + run: cp src/remote/optimizer.bench.ts /tmp/optimizer.bench.ts + + - name: Switch to base commit + run: git checkout -f ${{ steps.base.outputs.sha }} + + - name: Restore PR bench file onto base source + run: | + mkdir -p src/remote + cp /tmp/optimizer.bench.ts src/remote/optimizer.bench.ts + + - name: Install dependencies (base) + id: base-install + continue-on-error: true + run: npm ci + + - name: Run benchmark on base + id: base-bench + if: steps.base-install.outcome == 'success' + continue-on-error: true + run: npx vitest bench --run --outputJson=/tmp/bench-base.json + + - name: Switch back to PR + if: always() + run: git checkout -f ${{ github.event.pull_request.head.sha }} + + - name: Build PR comment + id: build + run: | + set +e + if [ "${{ steps.base-bench.outcome }}" = "success" ] && [ -f /tmp/bench-base.json ]; then + node scripts/compare-bench.mjs /tmp/bench-base.json /tmp/bench-pr.json > bench-comparison.md + echo "regressed=$?" >> "$GITHUB_OUTPUT" + echo "Base commit: \`${{ steps.base.outputs.sha }}\`" >> bench-comparison.md + else + node scripts/compare-bench.mjs /tmp/bench-pr.json > bench-comparison.md + echo "regressed=0" >> "$GITHUB_OUTPUT" + echo "" >> bench-comparison.md + echo "_Base commit \`${{ steps.base.outputs.sha }}\` benchmark did not run (missing bench, incompatible API, or failure)._" >> bench-comparison.md + fi + + - name: Upload raw bench results + uses: actions/upload-artifact@v4 + if: always() + with: + name: bench-results + path: | + /tmp/bench-pr.json + /tmp/bench-base.json + if-no-files-found: ignore + retention-days: 30 + + - name: Post / update PR comment + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const body = fs.readFileSync('bench-comparison.md', 'utf8'); + const marker = ''; + const final = marker + '\n' + body; + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + if (!issue_number) return; // workflow_dispatch has no PR + const comments = await github.paginate(github.rest.issues.listComments, { + owner, repo, issue_number, + }); + const existing = comments.find(c => c.body && c.body.startsWith(marker)); + if (existing) { + await github.rest.issues.updateComment({ + owner, repo, comment_id: existing.id, body: final, + }); + } else { + await github.rest.issues.createComment({ + owner, repo, issue_number, body: final, + }); + } + + - name: Fail on regression + if: steps.build.outputs.regressed == '1' + run: | + echo "::error::Benchmark regression detected (>20% on at least one bench). See PR comment for details." + exit 1 diff --git a/package.json b/package.json index 19bdab3..6649234 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "start:dev": "node --import tsx --watch src/main.ts", "dev": "node --env-file=.env --import tsx --watch src/main.ts", "test": "vitest", + "bench": "vitest bench", "typecheck": "tsc --noEmit", "build": "esbuild src/main.ts --bundle --platform=node --format=esm --outfile=dist/main.mjs --packages=external && cp src/reporters/github/success.md.j2 src/sync/schema_dump.sql dist/" }, diff --git a/scripts/compare-bench.mjs b/scripts/compare-bench.mjs new file mode 100644 index 0000000..4f7e18f --- /dev/null +++ b/scripts/compare-bench.mjs @@ -0,0 +1,140 @@ +#!/usr/bin/env node +// Compare vitest --outputJson benchmark reports and emit a markdown summary. +// Usage: +// node scripts/compare-bench.mjs [--threshold=] (diff mode) +// node scripts/compare-bench.mjs (current-only mode) +// Exit code 0 unless a benchmark regressed beyond --threshold (default 20%). + +import { readFileSync } from "node:fs"; + +const args = process.argv.slice(2); +const positional = args.filter((a) => !a.startsWith("--")); +const flags = Object.fromEntries( + args + .filter((a) => a.startsWith("--")) + .map((a) => { + const [k, v] = a.slice(2).split("="); + return [k, v ?? "true"]; + }), +); + +if (positional.length < 1) { + console.error( + "usage: compare-bench.mjs [--threshold=]\n" + + " compare-bench.mjs ", + ); + process.exit(2); +} + +const threshold = Number(flags.threshold ?? 20); +const currentOnly = positional.length === 1; +const [basePath, prPath] = currentOnly ? [null, positional[0]] : positional; + +const base = currentOnly ? { files: [] } : JSON.parse(readFileSync(basePath, "utf8")); +const pr = JSON.parse(readFileSync(prPath, "utf8")); + +function flatten(report) { + const out = new Map(); + for (const file of report.files ?? []) { + for (const group of file.groups ?? []) { + for (const b of group.benchmarks ?? []) { + out.set(`${group.fullName} > ${b.name}`, b); + } + } + } + return out; +} + +const baseMap = flatten(base); +const prMap = flatten(pr); + +function fmtMs(n) { + if (n == null || Number.isNaN(n)) return "โ€”"; + return `${Math.round(n).toLocaleString("en-US")}ms`; +} + +function fmtPct(n) { + if (n == null || Number.isNaN(n)) return "โ€”"; + const sign = n > 0 ? "+" : ""; + return `${sign}${n.toFixed(1)}%`; +} + +function verdict(deltaPct, threshold) { + if (deltaPct == null || Number.isNaN(deltaPct)) return "๐Ÿ†•"; + if (deltaPct >= threshold) return "๐Ÿ”ด"; + if (deltaPct <= -threshold) return "๐ŸŸข"; + return "โšช"; +} + +const rows = []; +let regressed = false; + +const allKeys = new Set([...baseMap.keys(), ...prMap.keys()]); +for (const key of allKeys) { + const b = baseMap.get(key); + const p = prMap.get(key); + const baseMean = b?.mean; + const prMean = p?.mean; + const deltaPct = + baseMean != null && prMean != null && baseMean > 0 + ? ((prMean - baseMean) / baseMean) * 100 + : null; + if (deltaPct != null && deltaPct >= threshold) regressed = true; + + rows.push({ + key, + baseMean, + prMean, + baseRme: b?.rme, + prRme: p?.rme, + deltaPct, + verdict: verdict(deltaPct, threshold), + onlyBase: !p, + onlyPr: !b, + }); +} + +rows.sort((a, b) => a.key.localeCompare(b.key)); + +const lines = []; +lines.push("### Benchmark comparison"); +lines.push(""); + +if (currentOnly) { + lines.push("_No baseline available โ€” showing PR results only._"); + lines.push(""); + lines.push("| Benchmark | Mean | RME | Samples |"); + lines.push("|---|---:|---:|---:|"); + for (const r of rows) { + const samples = prMap.get(r.key)?.sampleCount ?? "โ€”"; + const rme = r.prRme != null ? `ยฑ${r.prRme.toFixed(1)}%` : "โ€”"; + lines.push(`| \`${r.key}\` | ${fmtMs(r.prMean)} | ${rme} | ${samples} |`); + } +} else { + lines.push( + `Threshold: ยฑ${threshold}% on mean. ๐Ÿ”ด regression ยท ๐ŸŸข improvement ยท โšช within noise ยท ๐Ÿ†• new/removed.`, + ); + lines.push(""); + lines.push("| | Benchmark | Base mean | PR mean | ฮ” | RME (base โ†’ PR) |"); + lines.push("|---|---|---:|---:|---:|---|"); + for (const r of rows) { + const rme = + r.baseRme != null && r.prRme != null + ? `ยฑ${r.baseRme.toFixed(1)}% โ†’ ยฑ${r.prRme.toFixed(1)}%` + : "โ€”"; + lines.push( + `| ${r.verdict} | \`${r.key}\` | ${fmtMs(r.baseMean)} | ${fmtMs(r.prMean)} | ${fmtPct(r.deltaPct)} | ${rme} |`, + ); + } +} + +lines.push(""); +lines.push( + "_Benchmarks use testcontainers + wall-time; some noise is expected. Treat single-digit deltas as not-significant._", +); + +process.stdout.write(lines.join("\n") + "\n"); + +if (regressed) { + process.exitCode = 1; +} diff --git a/src/remote/optimizer.bench.ts b/src/remote/optimizer.bench.ts new file mode 100644 index 0000000..bf049a0 --- /dev/null +++ b/src/remote/optimizer.bench.ts @@ -0,0 +1,252 @@ +import { bench, describe, beforeAll, afterAll } from "vitest"; +import { PostgreSqlContainer } from "@testcontainers/postgresql"; +import { Pool } from "pg"; +import { QueryOptimizer } from "./query-optimizer.ts"; +import { ConnectionManager } from "../sync/connection-manager.ts"; +import { Connectable } from "../sync/connectable.ts"; +import { RecentQuery, QueryHash } from "../sql/recent-query.ts"; +import type { StatisticsMode } from "@query-doctor/core"; + +const PG_COMMAND = [ + "-c", + "shared_preload_libraries=pg_stat_statements", + "-c", + "autovacuum=off", + "-c", + "track_counts=off", + "-c", + "track_io_timing=off", + "-c", + "track_activities=off", +]; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +type BenchContext = { + manager: ConnectionManager; + optimizer: QueryOptimizer; + queries: RecentQuery[]; + stats: StatisticsMode; +}; + +// --------------------------------------------------------------------------- +// Schema & query generators +// --------------------------------------------------------------------------- + +function tName(i: number): string { + return `t_${String(i).padStart(3, "0")}`; +} + +function generateDDL(tableCount: number): string { + const stmts: string[] = []; + for (let i = 1; i <= tableCount; i++) { + const t = tName(i); + const hasRef = i > 1; + stmts.push(`CREATE TABLE ${t} ( + id serial PRIMARY KEY,${hasRef ? "\n ref_id int," : ""} + name text, + value numeric(10,2), + status text, + active boolean DEFAULT true, + created_at timestamp DEFAULT now() + );`); + if (i % 3 === 0) stmts.push(`CREATE INDEX ${t}_name_idx ON ${t}(name);`); + if (hasRef && i % 2 === 0) + stmts.push(`CREATE INDEX ${t}_ref_idx ON ${t}(ref_id);`); + } + return stmts.join("\n"); +} + +const QUERY_PATTERNS: ((t: string, ref: string | null) => string)[] = [ + (t) => `SELECT * FROM ${t} WHERE name = $1`, + (t) => `SELECT * FROM ${t} WHERE status = $1`, + (t) => `SELECT * FROM ${t} WHERE value > $1 ORDER BY value LIMIT 50`, + (t) => `SELECT * FROM ${t} ORDER BY created_at DESC LIMIT 50`, + (t) => `SELECT * FROM ${t} WHERE active = $1 AND status = $2`, + (t) => `SELECT status, COUNT(*) as cnt FROM ${t} GROUP BY status`, + (t, ref) => + ref + ? `SELECT a.id, a.name, b.name as ref_name FROM ${t} a JOIN ${ref} b ON b.id = a.ref_id WHERE a.active = $1` + : `SELECT * FROM ${t} WHERE name = $1 AND value > $2`, +]; + +function generateQueries(tableCount: number, queryCount: number): string[] { + const queries: string[] = []; + for (let q = 0; queries.length < queryCount; q++) { + const tableIdx = (q % tableCount) + 1; + const t = tName(tableIdx); + const ref = tableIdx > 1 ? tName(Math.ceil(tableIdx / 2)) : null; + const patternIdx = Math.floor(q / tableCount) % QUERY_PATTERNS.length; + queries.push(QUERY_PATTERNS[patternIdx](t, ref)); + } + return queries.slice(0, queryCount); +} + +function generateStats( + tableCount: number, +): StatisticsMode { + const stats = []; + for (let i = 1; i <= tableCount; i++) { + const t = tName(i); + const hasRef = i > 1; + const reltuples = 100_000 + i * 1_000; + const columns = [ + "id", + ...(hasRef ? ["ref_id"] : []), + "name", + "value", + "status", + "active", + "created_at", + ]; + const indexes: { indexName: string; relpages: number; reltuples: number; relallvisible: number; amname: "btree"; fillfactor: number; columns: { attlen: null }[] }[] = [ + { + indexName: `${t}_pkey`, + relpages: Math.ceil(reltuples / 500), + reltuples, + relallvisible: 1, + amname: "btree", + fillfactor: 0.9, + columns: [{ attlen: null }], + }, + ]; + if (i % 3 === 0) + indexes.push({ + indexName: `${t}_name_idx`, + relpages: Math.ceil(reltuples / 500), + reltuples, + relallvisible: 1, + amname: "btree", + fillfactor: 0.9, + columns: [{ attlen: null }], + }); + if (hasRef && i % 2 === 0) + indexes.push({ + indexName: `${t}_ref_idx`, + relpages: Math.ceil(reltuples / 500), + reltuples, + relallvisible: 1, + amname: "btree", + fillfactor: 0.9, + columns: [{ attlen: null }], + }); + stats.push({ + tableName: t, + schemaName: "public", + relpages: Math.ceil(reltuples / 100), + reltuples, + relallvisible: 1, + columns: columns.map((c) => ({ columnName: c, stats: null, attlen: null })), + indexes, + }); + } + return { kind: "fromStatisticsExport", source: { kind: "inline" }, stats }; +} + +// --------------------------------------------------------------------------- +// Setup helpers +// --------------------------------------------------------------------------- + +async function parseQueries(rawQueries: string[]): Promise { + const results: RecentQuery[] = []; + for (let i = 0; i < rawQueries.length; i++) { + const hash = QueryHash.parse(`bench_${i}`); + const query = await RecentQuery.analyze( + { + query: rawQueries[i], + formattedQuery: rawQueries[i], + username: "bench", + meanTime: 0, + calls: "1", + rows: "0", + topLevel: true, + }, + hash, + Date.now(), + ); + results.push(query); + } + return results; +} + +async function setupDatabase( + baseUrl: string, + dbName: string, + tableCount: number, + queryCount: number, +): Promise { + const adminPool = new Pool({ connectionString: baseUrl }); + await adminPool.query(`CREATE DATABASE ${dbName}`); + await adminPool.end(); + + const dbUrl = baseUrl.replace(/\/[^/]*$/, `/${dbName}`); + const dbPool = new Pool({ connectionString: dbUrl }); + await dbPool.query(generateDDL(tableCount)); + await dbPool.end(); + + const manager = ConnectionManager.forLocalDatabase(); + const conn = Connectable.fromString(dbUrl); + const optimizer = new QueryOptimizer(manager, conn); + const queries = await parseQueries(generateQueries(tableCount, queryCount)); + const stats = generateStats(tableCount); + + return { manager, optimizer, queries, stats }; +} + +// --------------------------------------------------------------------------- +// Benchmarks +// --------------------------------------------------------------------------- + +let container: Awaited>; +let small: BenchContext; +let medium: BenchContext; +let large: BenchContext; + +beforeAll(async () => { + container = await new PostgreSqlContainer("postgres:17") + .withCommand(PG_COMMAND) + .start(); + + const baseUrl = container.getConnectionUri(); + small = await setupDatabase(baseUrl, "bench_small", 3, 5); + medium = await setupDatabase(baseUrl, "bench_medium", 20, 100); + large = await setupDatabase(baseUrl, "bench_large", 300, 1000); +}, 300_000); + +afterAll(async () => { + for (const ctx of [small, medium, large]) { + if (ctx) { + ctx.optimizer.stop(); + await ctx.manager.closeAll(); + } + } + if (container) await container.stop(); +}); + +describe("query optimizer", () => { + bench( + "small (3 tables, 5 queries)", + async () => { + await small.optimizer.start(small.queries, small.stats); + }, + { iterations: 5, warmupIterations: 1, time: 0, warmupTime: 0 }, + ); + + bench( + "medium (20 tables, 100 queries)", + async () => { + await medium.optimizer.start(medium.queries, medium.stats); + }, + { iterations: 3, warmupIterations: 1, time: 0, warmupTime: 0 }, + ); + + bench( + "large (300 tables, 1000 queries)", + async () => { + await large.optimizer.start(large.queries, large.stats); + }, + { iterations: 3, warmupIterations: 1, time: 0, warmupTime: 0 }, + ); +}); diff --git a/vitest.config.ts b/vitest.config.ts index 6f7984b..0a724f5 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -4,5 +4,8 @@ export default defineConfig({ test: { include: ["src/**/*.test.ts"], testTimeout: 120_000, + benchmark: { + include: ["src/**/*.bench.ts"], + }, }, });