From f39df18ed73dfba9ee416d84ae3462ab59068dc3 Mon Sep 17 00:00:00 2001 From: eliterdav09-creator <236034507+eliterdav09-creator@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:58:28 +0700 Subject: [PATCH 1/4] Add baseline fetchall guard --- scripts/baselines/fetchall_existing.txt | 182 ++++++++++++++++++++++++ scripts/check_fetchall.sh | 98 ++++++------- tests/test_fetchall_guard.py | 53 +++++++ 3 files changed, 275 insertions(+), 58 deletions(-) create mode 100644 scripts/baselines/fetchall_existing.txt create mode 100644 tests/test_fetchall_guard.py diff --git a/scripts/baselines/fetchall_existing.txt b/scripts/baselines/fetchall_existing.txt new file mode 100644 index 000000000..754355da3 --- /dev/null +++ b/scripts/baselines/fetchall_existing.txt @@ -0,0 +1,182 @@ +node/utxo_genesis_migration.py:91: ).fetchall() +node/utxo_genesis_migration.py:104: ).fetchall() +node/utxo_db.py:379: ).fetchall() +node/utxo_db.py:940: ).fetchall() +node/utxo_db.py:1281: ).fetchall() +node/utxo_db.py:1346: ).fetchall() +node/utxo_db.py:1420: ).fetchall() +node/sophia_governor_review_service.py:540: ).fetchall() +node/sophia_governor_review_service.py:568: ).fetchall() +node/sophia_governor_review_service.py:640: ).fetchall() +node/sophia_governor_inbox.py:535: ).fetchall() +node/sophia_governor_inbox.py:894: rows = conn.execute(query, params).fetchall() +node/sophia_governor_inbox.py:968: ).fetchall() +node/sophia_governor_inbox.py:975: ).fetchall() +node/sophia_governor_inbox.py:982: ).fetchall() +node/sophia_governor.py:910: ).fetchall() +node/sophia_governor.py:951: ).fetchall() +node/sophia_elya_service.py:60: columns = conn.execute("PRAGMA table_info(balances)").fetchall() +node/sophia_elya_service.py:97: columns = {row[1] for row in conn.execute("PRAGMA table_info(epoch_state)").fetchall()} +node/sophia_attestation_inspector.py:413: ).fetchall() +node/sophia_attestation_inspector.py:576: ).fetchall() +node/sophia_attestation_inspector.py:679: ).fetchall() +node/slashing_penalties.py:433: return tuple(row[1] for row in conn.execute(f'PRAGMA table_info("{table_name}")').fetchall()) +node/rustchain_x402.py:45: existing = {row[1] for row in cursor.fetchall()} +node/rustchain_x402.py:81: columns = {row[1] for row in conn.execute("PRAGMA table_info(balances)").fetchall()} +node/rustchain_v2_integrated_v2.2.1_rip200.py:1316: columns = conn.execute("PRAGMA table_info(epoch_enroll)").fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:1323: rows = conn.execute("SELECT epoch, miner_pk, weight FROM epoch_enroll").fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:1478: """, (limit, offset)).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:1525: _epoch_state_cols = {row[1] for row in c.execute("PRAGMA table_info(epoch_state)").fetchall()} +node/rustchain_v2_integrated_v2.2.1_rip200.py:2667: return {row[1] for row in conn.execute(f"PRAGMA table_info({table_name})").fetchall()} +node/rustchain_v2_integrated_v2.2.1_rip200.py:2814: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:3015: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:3030: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:3670: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:4833: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:5253: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:6015: ).fetchall(): +node/rustchain_v2_integrated_v2.2.1_rip200.py:6024: ).fetchall(): +node/rustchain_v2_integrated_v2.2.1_rip200.py:6747: WHERE active=1 ORDER BY signer_id""").fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:6777: cols = {r[1] for r in cur.execute("PRAGMA table_info(balances)").fetchall()} +node/rustchain_v2_integrated_v2.2.1_rip200.py:6973: for row in c.fetchall(): +node/rustchain_v2_integrated_v2.2.1_rip200.py:7160: rows = conn.execute(f"PRAGMA table_info({table_name})").fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:7258: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:7277: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:7668: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:7701: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:7732: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:8011: for r in c.execute("PRAGMA table_info(balances)").fetchall(): +node/rustchain_v2_integrated_v2.2.1_rip200.py:8467: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:8612: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:8659: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:8684: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:9084: """, (now,)).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:9187: """).fetchall()) +node/rustchain_v2_integrated_v2.2.1_rip200.py:9192: """).fetchall()) +node/rustchain_v2_integrated_v2.2.1_rip200.py:9199: """).fetchall()) +node/rustchain_v2_integrated_v2.2.1_rip200.py:9360: ).fetchall() +node/rustchain_v2_integrated_v2.2.1_rip200.py:9719: return {row[1] for row in c.execute("PRAGMA table_info(balances)").fetchall()} +node/rustchain_tx_handler.py:133: columns = [col[1] for col in cursor.fetchall()] +node/rustchain_tx_handler.py:187: tables = {row[0] for row in cursor.fetchall()} +node/rustchain_tx_handler.py:210: columns = [col[1] for col in cursor.fetchall()] +node/rustchain_tx_handler.py:368: return {row["nonce"] for row in cursor.fetchall()} +node/rustchain_tx_handler.py:451: pending_nonces = {row["nonce"] for row in cursor.fetchall()} +node/rustchain_tx_handler.py:545: for row in cursor.fetchall() +node/rustchain_tx_handler.py:910: transactions = [dict(row) for row in cursor.fetchall()] +node/rustchain_sync.py:93: rows = conn.execute(f"PRAGMA table_info({table_name})").fetchall() +node/rustchain_sync.py:139: rows = cursor.fetchall() +node/rustchain_sync.py:179: data = [dict(row) for row in cursor.fetchall()] +node/rustchain_p2p_sync_secure.py:414: return [row[0] for row in cursor.fetchall()] +node/rustchain_p2p_sync_secure.py:543: rows = cursor.fetchall() +node/rustchain_p2p_sync.py:186: """, (cutoff, fresh_n)).fetchall() +node/rustchain_p2p_sync.py:198: """, (cutoff, cap)).fetchall() +node/rustchain_p2p_sync.py:576: """, (start, limit)).fetchall() +node/rustchain_p2p_gossip.py:675: """).fetchall() +node/rustchain_p2p_gossip.py:687: """).fetchall() +node/rustchain_p2p_gossip.py:694: """).fetchall() +node/rustchain_p2p_gossip.py:1172: attested_miners = {row[0] for row in cursor.fetchall()} +node/rustchain_migration.py:123: tables = [row[0] for row in cursor.fetchall()] +node/rustchain_migration.py:360: attestations = cursor.fetchall() +node/rustchain_migration.py:493: metadata = dict(cursor.fetchall()) +node/rustchain_ergo_anchor.py:472: for row in cursor.fetchall(): +node/rustchain_ergo_anchor.py:561: anchors = [dict(row) for row in cursor.fetchall()] +node/rustchain_dashboard.py:504: """, (epoch_data['epoch'],)).fetchall() +node/rustchain_dashboard.py:557: """).fetchall() +node/rustchain_block_producer.py:314: for row in cursor.fetchall(): +node/rustchain_block_producer.py:485: for row in cursor.fetchall(): +node/rustchain_block_producer.py:516: for row in cursor.fetchall() +node/rustchain_block_producer.py:884: return {row[1] for row in conn.execute(f"PRAGMA table_info({table})").fetchall()} +node/rustchain_block_producer.py:1068: ).fetchall() +node/rustchain_block_producer.py:1088: ).fetchall() +node/rustchain_bft_consensus.py:234: ).fetchall() +node/rom_clustering_server.py:85: duplicate_keys = [(row[0], row[1]) for row in cur.fetchall()] +node/rom_clustering_server.py:95: rows = cur.fetchall() +node/rom_clustering_server.py:235: other_miners = [row[0] for row in cur.fetchall()] +node/rom_clustering_server.py:317: for row in cur.fetchall(): +node/rom_clustering_server.py:345: for row in cur.fetchall(): +node/rip_node_sync.py:63: return set(row[0] for row in cursor.fetchall()) +node/rip_200_round_robin_1cpu1vote_v2.py:275: for row in cursor.fetchall(): +node/rip_200_round_robin_1cpu1vote_v2.py:321: epoch_miners = cursor.fetchall() +node/rip_200_round_robin_1cpu1vote.py:502: return cursor.fetchall() +node/rip_200_round_robin_1cpu1vote.py:632: cols = cursor.execute("PRAGMA table_info(miner_attest_recent)").fetchall() +node/rip_200_round_robin_1cpu1vote.py:643: enrolled = cursor.fetchall() +node/rip_200_round_robin_1cpu1vote.py:704: epoch_miners = cursor.fetchall() +node/rip0202_evidence.py:124: rows = conn.execute(sql, params).fetchall() +node/rewards_implementation_rip200.py:362: ).fetchall() +node/proposer_duty_calendar.py:95: ).fetchall() +node/payout_worker.py:47: """, (limit,)).fetchall() +node/payout_worker.py:109: """).fetchall() +node/payout_worker.py:302: """).fetchall() +node/payout_worker.py:400: """, (cutoff,)).fetchall() +node/migrate_machine_passport.py:49: result['tables'] = [row[0] for row in cursor.fetchall()] +node/machine_passport.py:450: """, params).fetchall() +node/machine_passport.py:495: """, (machine_id,)).fetchall() +node/machine_passport.py:529: """, (machine_id,)).fetchall() +node/machine_passport.py:567: """, (machine_id,)).fetchall() +node/machine_passport.py:601: """, (machine_id,)).fetchall() +node/lock_ledger.py:480: rows = cursor.execute(query, params).fetchall() +node/lock_ledger.py:538: rows = cursor.execute(query, params).fetchall() +node/lock_ledger.py:587: """, (miner_id,)).fetchall() +node/hardware_fingerprint_replay.py:266: recent_submissions = c.fetchall() +node/hardware_fingerprint_replay.py:347: collisions = c.fetchall() +node/hardware_fingerprint_replay.py:554: history = c.fetchall() +node/hardware_binding_v2.py:193: for row in c.fetchall(): +node/hall_of_rust.py:326: rows = c.fetchall() +node/hall_of_rust.py:576: rows = c.fetchall() +node/hall_of_rust.py:656: for r in c.fetchall() +node/hall_of_rust.py:678: for r in c.fetchall() +node/hall_of_rust.py:827: for row in c.fetchall(): +node/hall_of_rust.py:870: for row in c.fetchall(): +node/gpu_render_protocol.py:152: cols = {row[1] for row in conn.execute("PRAGMA table_info(render_escrow)").fetchall()} +node/gpu_render_protocol.py:272: rows = conn.execute(query, params).fetchall() +node/gpu_render_protocol.py:427: ).fetchall() +node/gpu_render_endpoints.py:64: cols = {row[1] for row in db.execute("PRAGMA table_info(render_escrow)").fetchall()} +node/governance.py:280: ).fetchall() +node/governance.py:511: ).fetchall() +node/governance.py:516: ).fetchall() +node/governance.py:546: ).fetchall() +node/ergo_raw_tx.py:77: miners = [dict(row) for row in cur.fetchall()] +node/ergo_miner_anchor.py:38: miners = [dict(row) for row in cur.fetchall()] +node/coalition.py:322: ).fetchall() +node/coalition.py:874: ).fetchall() +node/coalition.py:879: ).fetchall() +node/coalition.py:915: ).fetchall() +node/coalition.py:960: ).fetchall() +node/coalition.py:966: ).fetchall() +node/claims_submission.py:116: columns = {row[1] for row in cursor.fetchall()} +node/claims_submission.py:640: for row in cursor.fetchall(): +node/claims_settlement.py:86: for row in cursor.fetchall(): +node/claims_settlement.py:131: for row in cursor.fetchall(): +node/claims_settlement.py:416: """, (max_claims,)).fetchall() +node/claims_settlement.py:443: """, (batch_id, max_claims)).fetchall() +node/claims_eligibility.py:675: epochs = [row[0] for row in cursor.fetchall() if row[0] >= 0] +node/bridge_api.py:547: rows = cursor.execute(query, params).fetchall() +node/bottube_feed_routes.py:128: rows = cursor_obj.fetchall() +node/beacon_x402.py:72: for row in cursor.fetchall()} +node/beacon_x402.py:341: ).fetchall() +node/beacon_x402.py:369: ).fetchall() +node/beacon_x402.py:410: ).fetchall() +node/beacon_identity.py:125: ).fetchall() +node/beacon_identity.py:259: ).fetchall() +node/beacon_api.py:383: ).fetchall() +node/beacon_api.py:412: ).fetchall() +node/beacon_api.py:678: ).fetchall() +node/beacon_api.py:684: ).fetchall() +node/beacon_api.py:726: ).fetchall() +node/beacon_api.py:955: ).fetchall() +node/beacon_api.py:1212: rows = db.execute("SELECT * FROM beacon_reputation ORDER BY score DESC").fetchall() +node/beacon_anchor.py:79: for row in conn.execute("PRAGMA table_info(beacon_envelopes)").fetchall() +node/beacon_anchor.py:236: ).fetchall() +node/beacon_anchor.py:305: ).fetchall() +node/bcos_routes.py:544: rows = conn.execute(query, params).fetchall() +node/anti_double_mining.py:160: enrolled = cursor.fetchall() +node/anti_double_mining.py:210: rows = cursor.fetchall() +node/anti_double_mining.py:333: rows = cursor.fetchall() +node/anti_double_mining.py:367: enrolled = cursor.fetchall() +node/anti_double_mining.py:407: rows = cursor.fetchall() +node/anti_double_mining.py:439: cols = conn.execute("PRAGMA table_info(epoch_enroll)").fetchall() +node/anti_double_mining.py:450: ).fetchall() +node/airdrop_v2.py:1142: rows = cursor.fetchall() +node/airdrop_v2.py:1193: rows = cursor.fetchall() +node/airdrop_v2.py:1226: for row in cursor.fetchall() +node/airdrop_v2.py:1238: for row in cursor.fetchall() diff --git a/scripts/check_fetchall.sh b/scripts/check_fetchall.sh index 419441aeb..a8ebc504b 100755 --- a/scripts/check_fetchall.sh +++ b/scripts/check_fetchall.sh @@ -1,39 +1,18 @@ #!/usr/bin/env bash # check_fetchall.sh — CI guard against unbounded .fetchall() in node code. # -# Background: issue #6627. The project shipped 6 [UTXO-BUG] fixes in one -# week, all the same shape: an unbounded .fetchall() on a public/semi-public -# endpoint, materializing attacker-influenced row counts into a Python list, -# exhausting node memory. The architectural fix is node/db_helpers.py -# (fetch_page / fetch_one_or_none). This script makes the fix structural by -# refusing to land new raw .fetchall() calls in node/ without an opt-in -# annotation justifying why bounded materialization is safe at that site. -# -# Opt-in annotation: -# # fetchall-ok: -# on the same line as .fetchall() OR on the immediately preceding line. -# -# Valid reasons: -# bounded-by-schema — query selects from a table whose row count is -# bounded by the schema (e.g. one row per epoch, -# one row per known fingerprint check). -# pragma-result — PRAGMA table_info / index_list / etc.; SQLite -# caps the row count by schema metadata. -# internal-test-helper — test-only path, no attacker influence. -# already-paginated — caller's SQL has its own bound, kept for clarity -# (only use this for grandfathered code being -# audited in a follow-up sweep). -# -# Usage: bash scripts/check_fetchall.sh -# Exit: 0 if every hit is annotated or migrated, 1 otherwise. +# This check supports a migration baseline: existing raw .fetchall() sites are +# listed in scripts/baselines/fetchall_existing.txt so CI can prevent new +# unannotated sites while the large legacy backlog is converted incrementally. set -u ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$ROOT" -# Prefer ripgrep when available — much faster on this 10k-line file — -# fall back to grep so the script also runs in a minimal CI image. +BASELINE_FILE="${FETCHALL_BASELINE:-scripts/baselines/fetchall_existing.txt}" +VALID_REASONS_RE='bounded-by-schema|pragma-result|internal-test-helper|already-paginated' + if command -v rg >/dev/null 2>&1; then MATCHES="$(rg -n '\.fetchall\(\)' node \ --glob '!node/tests/**' \ @@ -50,37 +29,32 @@ else --exclude='db_helpers.py' 2>/dev/null || true)" fi -# Filter docstring / comment / string-literal matches: only treat lines -# whose first non-whitespace .fetchall() occurrence is preceded by an -# unbalanced quote pair as code. The cheap-but-good-enough heuristic: if -# a line contains a triple-quote OR has more than one " or ' before the -# .fetchall() and no `=` / `(` / `.` immediately before it as code, drop -# it. We keep the simpler approach: ignore lines that look like rST -# literal-rendered backticks (``.fetchall()``) which is how the helper -# module documents the bug class. MATCHES="$(echo "$MATCHES" | grep -v '\`\`\.fetchall()' || true)" -VALID_REASONS_RE='bounded-by-schema|pragma-result|internal-test-helper|already-paginated' +baseline_tmp="$(mktemp)" +if [ -f "$BASELINE_FILE" ]; then + sed '/^$/d' "$BASELINE_FILE" | sort -u > "$baseline_tmp" +else + : > "$baseline_tmp" +fi -unannotated_count=0 -unannotated_list="" +unannotated_tmp="$(mktemp)" +new_tmp="$(mktemp)" +stale_tmp="$(mktemp)" +trap 'rm -f "$baseline_tmp" "$unannotated_tmp" "$new_tmp" "$stale_tmp"' EXIT -# IFS reset so we iterate line-by-line, not whitespace-by-whitespace. while IFS= read -r hit; do [ -z "$hit" ] && continue - # rg/grep emit `path:lineno:content` — split that. file="${hit%%:*}" rest="${hit#*:}" lineno="${rest%%:*}" content="${rest#*:}" - # 1) Same-line annotation? if echo "$content" | grep -qE "#\s*fetchall-ok:\s*($VALID_REASONS_RE)"; then continue fi - # 2) Prior-line annotation? Look at lineno-1. prior=$(( lineno - 1 )) if [ "$prior" -ge 1 ] && [ -f "$file" ]; then prior_line=$(sed -n "${prior}p" "$file") @@ -89,29 +63,37 @@ while IFS= read -r hit; do fi fi - unannotated_count=$(( unannotated_count + 1 )) - unannotated_list="${unannotated_list}${file}:${lineno}:${content} -" + echo "$hit" >> "$unannotated_tmp" done <<< "$MATCHES" -if [ "$unannotated_count" -gt 0 ]; then - echo "ERROR: $unannotated_count unannotated .fetchall() call(s) in node/ — these" - echo "are candidates for the UTXO-OOM bug class (issue #6627)." +sort -u "$unannotated_tmp" -o "$unannotated_tmp" +comm -23 "$unannotated_tmp" "$baseline_tmp" > "$new_tmp" +comm -13 "$unannotated_tmp" "$baseline_tmp" > "$stale_tmp" + +if [ -s "$new_tmp" ]; then + count=$(wc -l < "$new_tmp" | tr -d ' ') + echo "ERROR: $count new unannotated .fetchall() call(s) in node/." + echo "These are candidates for the UTXO-OOM bug class (issue #6627)." echo "" echo "Fix options:" - echo " 1) Migrate to node.db_helpers.fetch_page() — bounded, safe." - echo " 2) If bounded materialization is genuinely safe at that site," - echo " add an annotation comment:" + echo " 1) Migrate to node.db_helpers.fetch_page() / fetch_one_or_none()." + echo " 2) If bounded materialization is genuinely safe, add:" echo " # fetchall-ok: " - echo " on the same line or the preceding line. Valid reasons:" - echo " bounded-by-schema, pragma-result, internal-test-helper," - echo " already-paginated" + echo " Valid reasons: bounded-by-schema, pragma-result, internal-test-helper, already-paginated" echo "" - echo "Unannotated hits:" - echo "$unannotated_list" | sed 's/^/ /' + echo "New unannotated hits:" + sed 's/^/ /' "$new_tmp" + exit 1 +fi + +if [ -s "$stale_tmp" ]; then + echo "ERROR: fetchall baseline has stale entries." + echo "Remove these from $BASELINE_FILE or regenerate the baseline:" + sed 's/^/ /' "$stale_tmp" exit 1 fi -echo "OK: every .fetchall() in node/ is either migrated to fetch_page() or" -echo "annotated with a valid reason. (issue #6627)" +legacy_count=$(wc -l < "$unannotated_tmp" | tr -d ' ') +echo "OK: no new unannotated .fetchall() calls in node/." +echo "Legacy baseline count: $legacy_count (issue #6627 migration backlog)." exit 0 diff --git a/tests/test_fetchall_guard.py b/tests/test_fetchall_guard.py new file mode 100644 index 000000000..23a50731c --- /dev/null +++ b/tests/test_fetchall_guard.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SCRIPT = ROOT / "scripts" / "check_fetchall.sh" +TMP_VIOLATION = ROOT / "node" / "_tmp_fetchall_guard_violation.py" + + +def run_guard() -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["bash", str(SCRIPT)], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=False, + ) + + +def test_fetchall_guard_passes_current_baseline(): + result = run_guard() + assert result.returncode == 0, result.stdout + assert "no new unannotated .fetchall()" in result.stdout + + +def test_fetchall_guard_blocks_new_unannotated_call(): + try: + TMP_VIOLATION.write_text( + "def leak(conn):\n" + " return conn.execute('SELECT * FROM attacker_controlled').fetchall()\n" + ) + result = run_guard() + assert result.returncode == 1 + assert "new unannotated .fetchall()" in result.stdout + assert "_tmp_fetchall_guard_violation.py" in result.stdout + finally: + TMP_VIOLATION.unlink(missing_ok=True) + + +def test_fetchall_guard_allows_annotated_call(): + try: + TMP_VIOLATION.write_text( + "def schema_bounded(conn):\n" + " # fetchall-ok: pragma-result\n" + " return conn.execute('PRAGMA table_info(example)').fetchall()\n" + ) + result = run_guard() + assert result.returncode == 0, result.stdout + finally: + TMP_VIOLATION.unlink(missing_ok=True) From 75a091e38269cddd9e2a78ef8a9f99968176c037 Mon Sep 17 00:00:00 2001 From: eliterdav09-creator <236034507+eliterdav09-creator@users.noreply.github.com> Date: Thu, 4 Jun 2026 16:38:02 +0700 Subject: [PATCH 2/4] fix: make fetchall guard fail closed --- scripts/check_fetchall.sh | 68 +++++++++++++++++++++++++++--------- tests/test_fetchall_guard.py | 15 ++++++++ 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/scripts/check_fetchall.sh b/scripts/check_fetchall.sh index a8ebc504b..9236aa36d 100755 --- a/scripts/check_fetchall.sh +++ b/scripts/check_fetchall.sh @@ -5,46 +5,82 @@ # listed in scripts/baselines/fetchall_existing.txt so CI can prevent new # unannotated sites while the large legacy backlog is converted incrementally. -set -u - -ROOT="$(cd "$(dirname "$0")/.." && pwd)" +set -euo pipefail + +SCRIPT_PATH="${BASH_SOURCE[0]}" +case "$SCRIPT_PATH" in + */*) SCRIPT_DIR="${SCRIPT_PATH%/*}" ;; + *) SCRIPT_DIR="." ;; +esac +SCRIPT_DIR="$(cd -- "$SCRIPT_DIR" && pwd)" +ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd)" cd "$ROOT" BASELINE_FILE="${FETCHALL_BASELINE:-scripts/baselines/fetchall_existing.txt}" VALID_REASONS_RE='bounded-by-schema|pragma-result|internal-test-helper|already-paginated' +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "ERROR: required command '$1' is not available" >&2 + exit 2 + fi +} + +for cmd in grep sed sort comm mktemp wc tr; do + require_cmd "$cmd" +done + +scan_tmp="$(mktemp)" +baseline_tmp="$(mktemp)" +unannotated_tmp="$(mktemp)" +new_tmp="$(mktemp)" +stale_tmp="$(mktemp)" +trap 'rm -f "$scan_tmp" "$baseline_tmp" "$unannotated_tmp" "$new_tmp" "$stale_tmp"' EXIT + +: > "$scan_tmp" +: > "$unannotated_tmp" + if command -v rg >/dev/null 2>&1; then - MATCHES="$(rg -n '\.fetchall\(\)' node \ + set +e + rg -n '\.fetchall\(\)' node \ --glob '!node/tests/**' \ --glob '!node/test_*' \ --glob '!node/__pycache__/**' \ --glob '!node/db_helpers.py' \ - --glob '!deprecated/**' || true)" + --glob '!deprecated/**' > "$scan_tmp" + scan_status=$? + set -e + if [ "$scan_status" -ne 0 ] && [ "$scan_status" -ne 1 ]; then + echo "ERROR: rg scan failed with status $scan_status" >&2 + exit 2 + fi else - MATCHES="$(grep -rn '\.fetchall()' node \ + set +e + grep -rn '\.fetchall()' node \ --include='*.py' \ --exclude-dir=tests \ --exclude-dir=__pycache__ \ --exclude='test_*' \ - --exclude='db_helpers.py' 2>/dev/null || true)" + --exclude='db_helpers.py' > "$scan_tmp" + scan_status=$? + set -e + if [ "$scan_status" -ne 0 ] && [ "$scan_status" -ne 1 ]; then + echo "ERROR: grep scan failed with status $scan_status" >&2 + exit 2 + fi fi -MATCHES="$(echo "$MATCHES" | grep -v '\`\`\.fetchall()' || true)" - -baseline_tmp="$(mktemp)" if [ -f "$BASELINE_FILE" ]; then sed '/^$/d' "$BASELINE_FILE" | sort -u > "$baseline_tmp" else : > "$baseline_tmp" fi -unannotated_tmp="$(mktemp)" -new_tmp="$(mktemp)" -stale_tmp="$(mktemp)" -trap 'rm -f "$baseline_tmp" "$unannotated_tmp" "$new_tmp" "$stale_tmp"' EXIT - while IFS= read -r hit; do [ -z "$hit" ] && continue + if echo "$hit" | grep -q '\`\`\.fetchall()'; then + continue + fi file="${hit%%:*}" rest="${hit#*:}" @@ -64,7 +100,7 @@ while IFS= read -r hit; do fi echo "$hit" >> "$unannotated_tmp" -done <<< "$MATCHES" +done < "$scan_tmp" sort -u "$unannotated_tmp" -o "$unannotated_tmp" comm -23 "$unannotated_tmp" "$baseline_tmp" > "$new_tmp" diff --git a/tests/test_fetchall_guard.py b/tests/test_fetchall_guard.py index 23a50731c..7750e2244 100644 --- a/tests/test_fetchall_guard.py +++ b/tests/test_fetchall_guard.py @@ -51,3 +51,18 @@ def test_fetchall_guard_allows_annotated_call(): assert result.returncode == 0, result.stdout finally: TMP_VIOLATION.unlink(missing_ok=True) + + +def test_fetchall_guard_fails_closed_when_required_tools_are_missing(): + result = subprocess.run( + ["/bin/bash", str(SCRIPT)], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=False, + env={"PATH": "/nonexistent"}, + ) + + assert result.returncode == 2 + assert "required command" in result.stdout From 0aed18955e3203a22398ec644d335ff7ce9723fa Mon Sep 17 00:00:00 2001 From: eliterdav09-creator <236034507+eliterdav09-creator@users.noreply.github.com> Date: Thu, 4 Jun 2026 16:40:36 +0700 Subject: [PATCH 3/4] chore: add SPDX headers to fetchall guard files --- scripts/check_fetchall.sh | 1 + tests/test_fetchall_guard.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/check_fetchall.sh b/scripts/check_fetchall.sh index 9236aa36d..88dc12d87 100755 --- a/scripts/check_fetchall.sh +++ b/scripts/check_fetchall.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# SPDX-License-Identifier: MIT # check_fetchall.sh — CI guard against unbounded .fetchall() in node code. # # This check supports a migration baseline: existing raw .fetchall() sites are diff --git a/tests/test_fetchall_guard.py b/tests/test_fetchall_guard.py index 7750e2244..80d8bc81a 100644 --- a/tests/test_fetchall_guard.py +++ b/tests/test_fetchall_guard.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: MIT from __future__ import annotations import subprocess From e121f950d7a6ab9eb5092b751636dbea53413718 Mon Sep 17 00:00:00 2001 From: eliterdav09-creator <236034507+eliterdav09-creator@users.noreply.github.com> Date: Thu, 4 Jun 2026 20:06:54 +0700 Subject: [PATCH 4/4] test: cover stale fetchall baseline guard --- scripts/baselines/fetchall_existing.txt | 3 ++ scripts/check_fetchall.sh | 17 +++++++--- tests/test_fetchall_guard.py | 42 +++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/scripts/baselines/fetchall_existing.txt b/scripts/baselines/fetchall_existing.txt index 754355da3..d256ac77f 100644 --- a/scripts/baselines/fetchall_existing.txt +++ b/scripts/baselines/fetchall_existing.txt @@ -1,3 +1,6 @@ +# Existing unannotated .fetchall() migration baseline for issue #6627. +# Expected to shrink as legacy callers migrate to node.db_helpers.fetch_page() +# or receive a narrow fetchall-ok annotation. Do not add new entries manually. node/utxo_genesis_migration.py:91: ).fetchall() node/utxo_genesis_migration.py:104: ).fetchall() node/utxo_db.py:379: ).fetchall() diff --git a/scripts/check_fetchall.sh b/scripts/check_fetchall.sh index 88dc12d87..d9ba63ccd 100755 --- a/scripts/check_fetchall.sh +++ b/scripts/check_fetchall.sh @@ -41,9 +41,11 @@ trap 'rm -f "$scan_tmp" "$baseline_tmp" "$unannotated_tmp" "$new_tmp" "$stale_tm : > "$scan_tmp" : > "$unannotated_tmp" +FETCHALL_PATTERN='\.fetchall[[:space:]]*\(' + if command -v rg >/dev/null 2>&1; then set +e - rg -n '\.fetchall\(\)' node \ + rg -n "$FETCHALL_PATTERN" node \ --glob '!node/tests/**' \ --glob '!node/test_*' \ --glob '!node/__pycache__/**' \ @@ -57,7 +59,7 @@ if command -v rg >/dev/null 2>&1; then fi else set +e - grep -rn '\.fetchall()' node \ + grep -rnE "$FETCHALL_PATTERN" node \ --include='*.py' \ --exclude-dir=tests \ --exclude-dir=__pycache__ \ @@ -72,7 +74,7 @@ else fi if [ -f "$BASELINE_FILE" ]; then - sed '/^$/d' "$BASELINE_FILE" | sort -u > "$baseline_tmp" + grep -vE '^($|#)' "$BASELINE_FILE" | sort -u > "$baseline_tmp" else : > "$baseline_tmp" fi @@ -104,6 +106,12 @@ while IFS= read -r hit; do done < "$scan_tmp" sort -u "$unannotated_tmp" -o "$unannotated_tmp" + +if [ "${1:-}" = "--print-baseline" ]; then + cat "$unannotated_tmp" + exit 0 +fi + comm -23 "$unannotated_tmp" "$baseline_tmp" > "$new_tmp" comm -13 "$unannotated_tmp" "$baseline_tmp" > "$stale_tmp" @@ -125,7 +133,8 @@ fi if [ -s "$stale_tmp" ]; then echo "ERROR: fetchall baseline has stale entries." - echo "Remove these from $BASELINE_FILE or regenerate the baseline:" + echo "Remove these from $BASELINE_FILE or regenerate the baseline with:" + echo " bash scripts/check_fetchall.sh --print-baseline > $BASELINE_FILE" sed 's/^/ /' "$stale_tmp" exit 1 fi diff --git a/tests/test_fetchall_guard.py b/tests/test_fetchall_guard.py index 80d8bc81a..b91d6b7b9 100644 --- a/tests/test_fetchall_guard.py +++ b/tests/test_fetchall_guard.py @@ -8,6 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] SCRIPT = ROOT / "scripts" / "check_fetchall.sh" TMP_VIOLATION = ROOT / "node" / "_tmp_fetchall_guard_violation.py" +TMP_BASELINE = ROOT / "scripts" / "baselines" / "_tmp_fetchall_stale_baseline.txt" def run_guard() -> subprocess.CompletedProcess[str]: @@ -41,6 +42,20 @@ def test_fetchall_guard_blocks_new_unannotated_call(): TMP_VIOLATION.unlink(missing_ok=True) +def test_fetchall_guard_blocks_whitespace_before_call_parens(): + try: + TMP_VIOLATION.write_text( + "def leak(conn):\n" + " return conn.execute('SELECT * FROM attacker_controlled').fetchall ()\n" + ) + result = run_guard() + assert result.returncode == 1 + assert "new unannotated .fetchall()" in result.stdout + assert "_tmp_fetchall_guard_violation.py" in result.stdout + finally: + TMP_VIOLATION.unlink(missing_ok=True) + + def test_fetchall_guard_allows_annotated_call(): try: TMP_VIOLATION.write_text( @@ -67,3 +82,30 @@ def test_fetchall_guard_fails_closed_when_required_tools_are_missing(): assert result.returncode == 2 assert "required command" in result.stdout + + +def test_fetchall_guard_detects_stale_baseline_entries(): + try: + current = subprocess.run( + ["bash", str(SCRIPT), "--print-baseline"], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=True, + ).stdout + TMP_BASELINE.write_text(current + "node/phantom.py:1:cursor.fetchall()\n") + result = subprocess.run( + ["bash", str(SCRIPT)], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=False, + env={"PATH": "/usr/bin:/bin", "FETCHALL_BASELINE": str(TMP_BASELINE)}, + ) + assert result.returncode == 1 + assert "stale entries" in result.stdout + assert "node/phantom.py" in result.stdout + finally: + TMP_BASELINE.unlink(missing_ok=True)