# test_benchmarks.R — Performance regression tests with timing floors # # These tests assert minimum throughput so performance regressions are caught. # Floors are set conservatively — any modern machine should clear them easily. context("Benchmarks — Performance Regression Guards") skip_on_ci() skip_on_cran() root <- Sys.getenv("TAL_PROJECT_ROOT", unset = ".") # --------------------------------------------------------------------------- # 1. FEN parse + serialize round-trip # --------------------------------------------------------------------------- test_that("FEN parse+serialize > 50,000 ops/sec", { fen <- "rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq e3 0 1" n <- 5000L t0 <- proc.time() for (i in seq_len(n)) { state <- cpp_parse_fen(fen) cpp_state_to_fen(state) } elapsed <- (proc.time() - t0)["elapsed"] rate <- n / elapsed cat(sprintf("\n FEN round-trip: %.0f ops/sec (%.3fs for %d ops)\n", rate, elapsed, n)) expect_gt(rate, 50000) }) # --------------------------------------------------------------------------- # 2. Legal move generation throughput # --------------------------------------------------------------------------- test_that("legal move generation > 50,000 positions/sec", { fens <- c( "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", # opening: 20 moves "r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 4 4", # middlegame "8/5k2/8/8/3Q4/8/8/4K3 w - - 0 1" # endgame: Q vs K ) n_per <- 2000L n_total <- n_per * length(fens) states <- lapply(fens, cpp_parse_fen) t0 <- proc.time() for (i in seq_len(n_per)) { for (s in states) { cpp_generate_legal_moves(s) } } elapsed <- (proc.time() - t0)["elapsed"] rate <- n_total / elapsed cat(sprintf("\n Legal move gen: %.0f positions/sec (%.3fs for %d)\n", rate, elapsed, n_total)) expect_gt(rate, 50000) }) # --------------------------------------------------------------------------- # 3. Single game replay speed # --------------------------------------------------------------------------- test_that("single game replay > 20,000 plies/sec", { # Italian Game: 1.e4 e5 2.Nf3 Nc6 3.Bc4 Bc5 4.d3 Nf6 5.Nc3 d6 moves <- c("e4","e5","Nf3","Nc6","Bc4","Bc5","d3","Nf6","Nc3","d6", "Bg5","h6","Bh4","g5","Bg3","Nh5","Nd5","Nf4","Bxf4","gxf4", "c3","Be6","Qb3","Bxd5","Bxd5","Qd7","O-O","O-O-O","a4","Kb8") n_reps <- 200L n_plies <- length(moves) * n_reps t0 <- proc.time() for (i in seq_len(n_reps)) { cpp_replay_game(moves) } elapsed <- (proc.time() - t0)["elapsed"] rate <- n_plies / elapsed cat(sprintf("\n Single game replay: %.0f plies/sec (%.3fs for %d plies)\n", rate, elapsed, n_plies)) expect_gt(rate, 20000) }) # --------------------------------------------------------------------------- # 4. Batch replay throughput (Tal.pgn — 2,431 games, 173K plies) # --------------------------------------------------------------------------- test_that("batch replay of Tal.pgn > 30,000 plies/sec", { pgn <- file.path(root, "pgnmentor", "Tal.pgn") skip_if_not(file.exists(pgn), "Tal.pgn not found") t0 <- proc.time() positions <- replay_all_games(pgn) elapsed <- (proc.time() - t0)["elapsed"] n_plies <- nrow(positions) rate <- n_plies / elapsed cat(sprintf("\n Tal.pgn batch replay: %.0f plies/sec (%s plies in %.2fs)\n", rate, format(n_plies, big.mark = ","), elapsed)) expect_gt(rate, 30000) expect_equal(sum(!positions$ok), 0L) }) # --------------------------------------------------------------------------- # 5. Batch enrichment throughput (cpp_enrich_batch) # --------------------------------------------------------------------------- test_that("batch enrichment > 5,000 positions/sec", { pgn <- file.path(root, "pgnmentor", "Tal.pgn") skip_if_not(file.exists(pgn), "Tal.pgn not found") # Use first 5,000 Tal positions positions <- replay_all_games(pgn) sample_pos <- head(positions[positions$ok, ], 5000) t0 <- proc.time() cpp_enrich_batch(sample_pos$fen, sample_pos$uci_move) elapsed <- (proc.time() - t0)["elapsed"] rate <- nrow(sample_pos) / elapsed cat(sprintf("\n Batch enrichment: %.0f positions/sec (5000 in %.2fs)\n", rate, elapsed)) expect_gt(rate, 5000) }) # --------------------------------------------------------------------------- # 6. Perft — correctness via known node counts (also measures speed) # --------------------------------------------------------------------------- test_that("perft depth 1-3 matches published node counts", { # Starting position: https://www.chessprogramming.org/Perft_Results state <- cpp_parse_fen("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1") # Depth 1: 20 nodes moves_d1 <- cpp_generate_legal_moves(state) expect_equal(length(moves_d1), 20L) # Depth 2: 400 nodes (each of 20 moves leads to 20 replies) count_d2 <- 0L for (m in moves_d1) { s2 <- cpp_apply_ply(state, m) count_d2 <- count_d2 + length(cpp_generate_legal_moves(s2)) } expect_equal(count_d2, 400L) # Depth 3: 8,902 nodes t0 <- proc.time() count_d3 <- 0L for (m1 in moves_d1) { s2 <- cpp_apply_ply(state, m1) moves_d2 <- cpp_generate_legal_moves(s2) for (m2 in moves_d2) { s3 <- cpp_apply_ply(s2, m2) count_d3 <- count_d3 + length(cpp_generate_legal_moves(s3)) } } elapsed <- (proc.time() - t0)["elapsed"] cat(sprintf("\n Perft(3): %d nodes in %.3fs (%.0f nodes/sec)\n", count_d3, elapsed, count_d3 / elapsed)) expect_equal(count_d3, 8902L) }) test_that("perft depth 1 for Kiwipete position = 48", { # Kiwipete: complex position with castling, en passant, promotions state <- cpp_parse_fen("r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 1") moves <- cpp_generate_legal_moves(state) expect_equal(length(moves), 48L) }) # --------------------------------------------------------------------------- # 7. Multi-PGN stress test — replay a large tournament file # --------------------------------------------------------------------------- test_that("large tournament PGN replays without errors", { pgn <- file.path(root, "pgnmentor", "Gibraltar2019.pgn") skip_if_not(file.exists(pgn), "Gibraltar2019.pgn not found") t0 <- proc.time() positions <- replay_all_games(pgn) elapsed <- (proc.time() - t0)["elapsed"] n_plies <- nrow(positions) n_err <- sum(!positions$ok) rate <- n_plies / elapsed cat(sprintf("\n Gibraltar2019: %s plies, %d errors, %.0f plies/sec (%.2fs)\n", format(n_plies, big.mark = ","), n_err, rate, elapsed)) # Allow some errors (PGN quality varies) but must be < 1% error_pct <- 100 * n_err / n_plies expect_lt(error_pct, 1.0) expect_gt(rate, 20000) })