-
Notifications
You must be signed in to change notification settings - Fork 472
Expand file tree
/
Copy pathrun_tests_parallel.sh
More file actions
executable file
·341 lines (311 loc) · 15.6 KB
/
run_tests_parallel.sh
File metadata and controls
executable file
·341 lines (311 loc) · 15.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#!/bin/bash
# Local parallel test runner — mirror CI's parallel structure as closely as
# possible while dropping the cross-machine artifact-transfer complexity.
# Shard definitions and the shard-4 catch-all logic match
# .github/workflows/build_pull_request.yml exactly.
# Usage: ./run_tests_parallel.sh [--shards=4|6]
#
# ── CI step → local equivalent (how cross-machine machinery is replaced) ───
# CI (multi-machine) Local (single machine)
# ─────────────────────────────────────────── ──────────────────────────────
# lint: check_test_isolation.py same (run before tests; abort on fail)
# compile job: mvn clean install -Pprod pre-compile once: install obp-commons
# + upload-artifact(target/) into shared ~/.m2 + test-compile
# test job: download-artifact + touch + obp-api into shared target/ — a
# install-file(obp-commons, parentPom) single machine shares ~/.m2 and
# target/ natively, so no artifact
# upload/download/touch is needed
# each shard on its own VM → mvn test two dynamic free ports per shard
# (port/DB isolation for free) (OBP_TESTS_PORT + OBP_HTTP4S_TEST_PORT)
# + scalatest:test (see port block)
# "Setup props" step writes test.default.props missing critical props injected via
# OBP_* env vars (see run_shard)
# report job: test_speed_report.py run best-effort after all shards
#
# Notes:
# * scalatest:test is the correct local stand-in for CI's `mvn test`: CI can
# run the full Maven lifecycle safely because each shard has its own VM and
# its own target/. Locally the 4 processes share one target/, so we must
# pre-compile once and then run scalatest:test (tests only) — otherwise the
# shards race on copying resources into target/test-classes.
# * Do NOT use 6 shards: they contend over the single local DB connection pool
# and produce spurious failures.
mkdir -p test-results/parallel
MVN_OPTS="-Xmx3G -Xss2m -XX:MaxMetaspaceSize=1G"
# Portable `timeout`: GNU coreutils ships it as `timeout` (Linux) but Homebrew on
# macOS installs it prefixed as `gtimeout`. Pick whichever exists.
if command -v timeout >/dev/null 2>&1; then
TIMEOUT_BIN="timeout"
elif command -v gtimeout >/dev/null 2>&1; then
TIMEOUT_BIN="gtimeout"
else
echo "ERROR: neither 'timeout' nor 'gtimeout' found on PATH" >&2
exit 1
fi
# Cross-checkout mutex: the obp-commons `mvn install` writes to the shared ~/.m2.
# Multiple checkouts starting this script simultaneously race on that write and can
# corrupt each other's JARs (torn ZipFile). We use an atomic mkdir lock to serialise
# ~/.m2 writes across processes. The lock is released immediately after the install
# and cleaned up on exit (including crashes) via the EXIT trap.
OBC_LOCK="/tmp/obp-commons-m2-install.lock"
trap 'rm -rf "$OBC_LOCK"' EXIT
SHARDS=4
for arg in "$@"; do
case $arg in
--shards=*) SHARDS="${arg#*=}" ;;
esac
done
# ── Dynamic free-port allocation ──────────────────────────────────────────
# Each shard is its own `mvn scalatest:test` JVM that binds TWO sockets:
# tests.port (OBP_TESTS_PORT, TestServer, default 8000)
# http4s.test.port (OBP_HTTP4S_TEST_PORT, Http4sTestServer, default 8087)
# Hardcoded ports collide when several project checkouts run this script at the
# same time. The un-injected 8087 even collides WITHIN one run, because the
# suites that start Http4sTestServer are split across shards (v5_0_0 in shard 2;
# http4sbridge/v7 in shard 4) — both JVMs would bind the default 8087.
# So we pick random high free ports per shard. A fixed base + upward scan can't
# solve simultaneous launches: at fork time no shard has bound yet, so every
# concurrent run picks the same base. Random high range + lsof skip (catches
# ports other checkouts already bound) + in-run dedup avoids that.
PORT_MIN=20000
PORT_MAX=55000
ASSIGNED_PORTS=() # ports already handed out in THIS run (prevents shard clashes)
ALLOC_PORT="" # alloc_free_port returns its result here (no subshell — see below)
# alloc_free_port: pick a random free port into the global ALLOC_PORT.
# Returns via a global, NOT stdout, so it must be called WITHOUT $(...): a command
# substitution runs in a subshell, and the ASSIGNED_PORTS append would be lost,
# breaking the in-run dedup. Call as: `alloc_free_port || exit 1; X=$ALLOC_PORT`.
alloc_free_port() {
local tries=0 p
while [ $tries -lt 500 ]; do
p=$(( PORT_MIN + RANDOM % (PORT_MAX - PORT_MIN) ))
if [[ " ${ASSIGNED_PORTS[*]} " != *" $p "* ]] && ! lsof -i :"$p" >/dev/null 2>&1; then
ASSIGNED_PORTS+=("$p")
ALLOC_PORT="$p"
return 0
fi
tries=$((tries + 1))
done
echo "[FATAL] no free port found in ${PORT_MIN}-${PORT_MAX} after 500 tries" >&2
return 1
}
# ── Shard definitions (identical to the CI matrix) ────────────────────────
S1="code.api.v4_0_0"
S2="code.api.v6_0_0,code.api.v5_0_0,code.api.v3_0_0,code.api.v2_1_0,\
code.api.v2_2_0,code.api.v2_0_0,code.api.v1_4_0,code.api.v1_3_0,\
code.api.UKOpenBanking,code.atms,code.branches,code.products,code.crm,\
code.accountHolder,code.entitlement,code.bankaccountcreation,code.bankconnectors,code.container"
S3="code.api.v1_2_1,code.api.ResourceDocs1_4_0,code.api.util,code.api.berlin,\
code.management,code.metrics,code.model,code.views,code.usercustomerlinks,\
code.customer,code.errormessages"
# Shard 4 base (identical to CI)
S4_BASE="code.api.v5_1_0,code.api.v3_1_0,code.api.http4sbridge,code.api.v7_0_0,\
code.api.Authentication,code.api.dauthTest,code.api.DirectLoginTest,\
code.api.gateWayloginTest,code.api.OBPRestHelperTest,code.util,code.connector"
# ── Shard 4 catch-all: discover every package not covered by shards 1–3 ───
# (identical to CI)
build_s4() {
local ASSIGNED="$S1 $(echo "$S2" | tr ',' ' ') $(echo "$S3" | tr ',' ' ') $(echo "$S4_BASE" | tr ',' ' ')"
local ALL_PKGS
ALL_PKGS=$(find obp-api/src/test/scala obp-commons/src/test/scala \
-name "*.scala" 2>/dev/null \
| sed 's|.*/test/scala/||; s|/[^/]*\.scala$||; s|/|.|g' \
| sort -u)
local EXTRAS=""
for pkg in $ALL_PKGS; do
local covered=false
for prefix in $ASSIGNED; do
if [[ "$pkg" == "$prefix" || "$pkg" == "$prefix."* || "$prefix" == "$pkg."* ]]; then
covered=true; break
fi
done
[ "$covered" = "false" ] && EXTRAS="${EXTRAS},${pkg}"
done
if [ -n "$EXTRAS" ]; then
echo " [Shard 4] Catch-all extras: $EXTRAS" >&2
fi
echo "${S4_BASE}${EXTRAS}"
}
S4=$(build_s4)
# ── 6-shard definitions (split the original shards 3 and 4; no catch-all) ──
S3_6="code.api.v1_2_1"
S4_6="code.api.ResourceDocs1_4_0,code.api.util,code.api.berlin,\
code.management,code.metrics,code.model,code.views,code.usercustomerlinks,\
code.customer,code.errormessages"
S5_6="code.api.v5_1_0,code.api.v3_1_0,code.api.http4sbridge,code.api.v7_0_0"
S6_6="code.api.Authentication,code.api.dauthTest,code.api.DirectLoginTest,\
code.api.gateWayloginTest,code.api.OBPRestHelperTest,code.util,code.connector"
run_shard() {
local n=$1
local filter=$2
local port=$3 # tests.port — TestServer (OBP_TESTS_PORT)
local http4s_port=$4 # http4s.test.port — Http4sTestServer (OBP_HTTP4S_TEST_PORT)
local log="test-results/parallel/shard${n}.log"
echo "[Shard $n] Starting... (tests.port=$port, http4s.test.port=$http4s_port)"
# OBP_* env vars take priority over the props file (see APIUtil.getPropsValue:
# property name . -> _, uppercased, prefixed with OBP_). This is the local
# equivalent of CI's "Setup props" step: the local test.default.props lacks
# mail.test.mode (CI has it); without it, flows like consent actually open an
# SMTP socket -> 500 (CI green, local red). We inject OBP_MAIL_TEST_MODE
# instead of editing props so we don't clobber the user's local DB settings.
# OBP_TESTS_PORT + OBP_HTTP4S_TEST_PORT carry the two dynamically-allocated free
# ports (both test servers bind a real socket; see the port-allocation block).
# Tests only, no recompile (the compile already happened in the pre-compile step).
# ${TIMEOUT_BIN} 1200: hard-kill after 20 min to prevent Pekko non-daemon threads from hanging.
MAVEN_OPTS="$MVN_OPTS" \
OBP_TESTS_PORT="${port}" \
OBP_HOSTNAME="http://localhost:${port}" \
OBP_HTTP4S_TEST_PORT="${http4s_port}" \
OBP_MAIL_TEST_MODE="true" \
OBP_API_INSTANCE_ID="shard_${n}" \
"$TIMEOUT_BIN" 1200 mvn scalatest:test -pl obp-api -DfailIfNoTests=false \
"-DwildcardSuites=${filter}" \
> "$log" 2>&1
local rc=$?
# timeout returns 124 on timeout (tests finished but the JVM didn't exit) — treat as success.
[ $rc -eq 124 ] && rc=0
if [ $rc -eq 0 ]; then
echo "[Shard $n] ✅ BUILD SUCCESS"
else
echo "[Shard $n] ❌ BUILD FAILURE — see $log"
fi
return $rc
}
START=$(date +%s)
# ── Lint (CI compile job's first step): test-isolation static check; abort on fail ──
echo "Lint: test-isolation check..."
if ! python3 .github/scripts/check_test_isolation.py; then
echo "❌ Lint failed (setPropsValues at class/feature body). Fix before running." >&2
exit 1
fi
echo ""
# ── Pre-compile (done once, so the 4 shards don't race over a shared target/) ──
# In CI the compile job runs `clean install` to install artifacts into ~/.m2 and
# uploads them; the test job downloads them and re-installs obp-commons / the
# parent POM into the new machine's ~/.m2 via install-file. A single local machine
# shares one ~/.m2, so we only install once — dropping upload/download/touch.
# Key point: each shard runs `scalatest:test -pl obp-api` (no -am), so obp-commons
# is resolved from ~/.m2, not from the reactor. We must install the CURRENT
# obp-commons into ~/.m2, otherwise shards test against a stale obp-commons (the
# old `test-compile -am` only built it in the reactor and never refreshed ~/.m2).
# The obp-commons install holds OBC_LOCK (see top) so concurrent checkouts don't
# race on the shared ~/.m2 write. The subsequent test-compile writes only to this
# checkout's own target/ and is safe to run in parallel across checkouts.
echo "Pre-compile 1/2: install obp-commons -> ~/.m2 ..."
until mkdir "$OBC_LOCK" 2>/dev/null; do sleep 2; done
MAVEN_OPTS="$MVN_OPTS" \
mvn install -DskipTests -pl obp-commons -q > test-results/parallel/precompile.log 2>&1
PRECOMPILE_RC=$?
rm -rf "$OBC_LOCK"
if [ $PRECOMPILE_RC -eq 0 ]; then
echo "Pre-compile 2/2: test-compile obp-api -> shared target/ ..."
MAVEN_OPTS="$MVN_OPTS" \
mvn test-compile -pl obp-api -q >> test-results/parallel/precompile.log 2>&1
PRECOMPILE_RC=$?
fi
if [ $PRECOMPILE_RC -ne 0 ]; then
echo "❌ Pre-compile failed — see test-results/parallel/precompile.log" >&2
tail -25 test-results/parallel/precompile.log >&2
exit 1
fi
echo "Pre-compile done, starting shards..."
echo ""
if [ "$SHARDS" = "6" ]; then
echo "Starting 6 shards in parallel..."
echo ""
# Allocate two free ports per shard BEFORE forking. Sequential calls (not in a
# subshell) so ASSIGNED_PORTS dedup carries across allocations.
alloc_free_port || exit 1; P1=$ALLOC_PORT; alloc_free_port || exit 1; H1=$ALLOC_PORT
alloc_free_port || exit 1; P2=$ALLOC_PORT; alloc_free_port || exit 1; H2=$ALLOC_PORT
alloc_free_port || exit 1; P3=$ALLOC_PORT; alloc_free_port || exit 1; H3=$ALLOC_PORT
alloc_free_port || exit 1; P4=$ALLOC_PORT; alloc_free_port || exit 1; H4=$ALLOC_PORT
alloc_free_port || exit 1; P5=$ALLOC_PORT; alloc_free_port || exit 1; H5=$ALLOC_PORT
alloc_free_port || exit 1; P6=$ALLOC_PORT; alloc_free_port || exit 1; H6=$ALLOC_PORT
run_shard 1 "$S1" "$P1" "$H1" & PID1=$!
run_shard 2 "$S2" "$P2" "$H2" & PID2=$!
run_shard 3 "$S3_6" "$P3" "$H3" & PID3=$!
run_shard 4 "$S4_6" "$P4" "$H4" & PID4=$!
run_shard 5 "$S5_6" "$P5" "$H5" & PID5=$!
run_shard 6 "$S6_6" "$P6" "$H6" & PID6=$!
wait $PID1; RC1=$?
wait $PID2; RC2=$?
wait $PID3; RC3=$?
wait $PID4; RC4=$?
wait $PID5; RC5=$?
wait $PID6; RC6=$?
RCS=($RC1 $RC2 $RC3 $RC4 $RC5 $RC6)
TOTAL_SHARDS=6
else
echo "Starting 4 shards in parallel..."
echo ""
# Allocate two free ports per shard BEFORE forking. Sequential calls (not in a
# subshell) so ASSIGNED_PORTS dedup carries across allocations.
alloc_free_port || exit 1; P1=$ALLOC_PORT; alloc_free_port || exit 1; H1=$ALLOC_PORT
alloc_free_port || exit 1; P2=$ALLOC_PORT; alloc_free_port || exit 1; H2=$ALLOC_PORT
alloc_free_port || exit 1; P3=$ALLOC_PORT; alloc_free_port || exit 1; H3=$ALLOC_PORT
alloc_free_port || exit 1; P4=$ALLOC_PORT; alloc_free_port || exit 1; H4=$ALLOC_PORT
run_shard 1 "$S1" "$P1" "$H1" & PID1=$!
run_shard 2 "$S2" "$P2" "$H2" & PID2=$!
run_shard 3 "$S3" "$P3" "$H3" & PID3=$!
run_shard 4 "$S4" "$P4" "$H4" & PID4=$!
wait $PID1; RC1=$?
wait $PID2; RC2=$?
wait $PID3; RC3=$?
wait $PID4; RC4=$?
RCS=($RC1 $RC2 $RC3 $RC4)
TOTAL_SHARDS=4
fi
END=$(date +%s)
ELAPSED=$(( (END - START) / 60 ))
SEC=$(( (END - START) % 60 ))
echo ""
echo "══════════════════════════════════════"
echo "All ${TOTAL_SHARDS} shards done in ${ELAPSED}m ${SEC}s"
echo ""
for (( n=1; n<=TOTAL_SHARDS; n++ )); do
log="test-results/parallel/shard${n}.log"
total_time=$(grep "Total time:" "$log" 2>/dev/null | tail -1 | sed 's/.*Total time: *//')
# CI parity ("RECOMPILATION CHECK"): after pre-compile, shards should not
# recompile; if they do, the artifacts weren't reused — warn.
if grep -q "Compiling " "$log" 2>/dev/null; then
echo " Shard $n: $total_time ⚠ recompilation detected (artifacts not reused)"
else
echo " Shard $n: $total_time"
fi
done
OVERALL_RC=0
for rc in "${RCS[@]}"; do
[ $rc -ne 0 ] && OVERALL_RC=1
done
# ── CI parity ("Report failing tests" step): extract failures for failed shards ──
if [ $OVERALL_RC -ne 0 ]; then
echo ""
echo "── Failure diagnostics (CI-style report) ───────────"
for (( n=1; n<=TOTAL_SHARDS; n++ )); do
[ "${RCS[$((n-1))]}" -eq 0 ] && continue
log="test-results/parallel/shard${n}.log"
echo ""
echo "### Shard $n ($log) ###"
echo " -- bridge / uncaught exceptions --"
grep -n "\[BRIDGE\] Exception\|Uncaught exception in dispatch\|requestScopeProxy=" \
"$log" 2>/dev/null | head -20 || true
echo " -- failing scenarios (*** FAILED ***) --"
grep -n "\*\*\* FAILED \*\*\*" "$log" 2>/dev/null | head -40 || true
done
fi
echo ""
if [ $OVERALL_RC -eq 0 ]; then
echo "✅ ALL SHARDS PASSED"
else
echo "❌ SOME SHARDS FAILED — check test-results/parallel/shardN.log"
fi
# ── CI parity (report job): http4s vs Lift per-test speed table; best-effort, ──
# does not affect the exit code.
REPORTS_DIR="obp-api/target/surefire-reports"
if ls "$REPORTS_DIR"/*.xml >/dev/null 2>&1; then
echo ""
echo "── Per-test speed (CI report-job equivalent) ───────"
python3 .github/scripts/test_speed_report.py "$REPORTS_DIR" 2>/dev/null \
|| echo " (speed report skipped)"
fi
exit $OVERALL_RC