Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,139 @@ tasks:
echo "🎉 All Prometheus rule tests passed."
fi
silent: false

perf:run:
desc: Run Milo end-to-end performance scenario and download results
silent: true
cmds:
- |
set -euo pipefail
# Parse CLI key=value overrides passed after -- and export as env
for kv in {{.CLI_ARGS}}; do
case "$kv" in
*=*) key="${kv%%=*}"; val="${kv#*=}"; export "$key=$val" ;;
*) : ;; # ignore non key=value tokens
esac
done
NS="${NS:-milo-system}"
MILO_NS="${MILO_NAMESPACE:-milo-system}"
VM_NS="${VM_NAMESPACE:-telemetry-system}"
VM_SVC_NAME="${VM_SERVICE_NAME:-vmsingle-telemetry-system-vm-victoria-metrics-k8s-stack}"
VM_PORT="${VM_PORT:-8428}"
VM_BASE_URL="${VM_BASE_URL:-http://${VM_SVC_NAME}.${VM_NS}.svc.cluster.local:${VM_PORT}}"
APISERVER_REGEX="${APISERVER_POD_REGEX:-milo-apiserver.*}"
ETCD_REGEX="${ETCD_POD_REGEX:-etcd.*}"
MILO_KUBECONFIG_SECRET_NAME="${MILO_KUBECONFIG_SECRET_NAME:-milo-controller-manager-kubeconfig}"
MILO_KUBECONFIG_SECRET_KEY="${MILO_KUBECONFIG_SECRET_KEY:-kubeconfig}"
MILO_KUBECONFIG_PATH="${MILO_KUBECONFIG_PATH:-/work/milo-kubeconfig}"
NUM_PROJECTS="${NUM_PROJECTS:-{{default "100" .NUM_PROJECTS}}}"
NUM_SECRETS_PER_PROJECT="${NUM_SECRETS_PER_PROJECT:-{{default "100" .NUM_SECRETS_PER_PROJECT}}}"
NUM_CONFIGMAPS_PER_PROJECT="${NUM_CONFIGMAPS_PER_PROJECT:-{{default "100" .NUM_CONFIGMAPS_PER_PROJECT}}}"
PROJECT_CONCURRENCY="${PROJECT_CONCURRENCY:-{{default "4" .PROJECT_CONCURRENCY}}}"
OBJECT_CONCURRENCY="${OBJECT_CONCURRENCY:-{{default "8" .OBJECT_CONCURRENCY}}}"
RUN_OBJECTS_PHASE="${RUN_OBJECTS_PHASE:-{{default "true" .RUN_OBJECTS_PHASE}}}"
OUT_DIR="${OUT_DIR:-{{default "/work/out" .OUT_DIR}}}"
STABILIZE_SECONDS="${STABILIZE_SECONDS:-{{default "90" .STABILIZE_SECONDS}}}"
MEASURE_WINDOW="${MEASURE_WINDOW:-{{default "2m" .MEASURE_WINDOW}}}"
ORG_NAME="${ORG_NAME:-{{default "" .ORG_NAME}}}"

echo "🔎 Checking Milo kubeconfig …"
if [ ! -f ".milo/kubeconfig" ]; then
echo "Error: .milo/kubeconfig not found. Run 'task dev:setup' first." >&2
exit 1
fi

echo "🔐 Ensuring perf-runner RBAC is applied …"
sed "s/NAMESPACE_PLACEHOLDER/${NS}/g" test/performance/config/perf-runner-rbac.yaml | task test-infra:kubectl -- apply -f -

echo "🗂 Publishing perf script as ConfigMap …"
task test-infra:kubectl -- -n ${NS} create configmap perf-script \
--from-file=perf_run.py=test/performance/scripts/perf_run.py \
--dry-run=client -o yaml | task test-infra:kubectl -- apply -f -

echo "🚀 Launching perf runner Job …"
sed \
-e "s/MILO_NAMESPACE_PLACEHOLDER/${MILO_NS}/g" \
-e "s/NAMESPACE_PLACEHOLDER/${NS}/g" \
-e "s#VM_BASE_URL_PLACEHOLDER#${VM_BASE_URL}#g" \
-e "s/APISERVER_REGEX_PLACEHOLDER/${APISERVER_REGEX}/g" \
-e "s/ETCD_REGEX_PLACEHOLDER/${ETCD_REGEX}/g" \
-e "s/NUM_PROJECTS_PLACEHOLDER/${NUM_PROJECTS}/g" \
-e "s/NUM_SECRETS_PLACEHOLDER/${NUM_SECRETS_PER_PROJECT}/g" \
-e "s/NUM_CONFIGMAPS_PLACEHOLDER/${NUM_CONFIGMAPS_PER_PROJECT}/g" \
-e "s/STABILIZE_SECONDS_PLACEHOLDER/${STABILIZE_SECONDS}/g" \
-e "s/MEASURE_WINDOW_PLACEHOLDER/${MEASURE_WINDOW}/g" \
-e "s/ORG_NAME_PLACEHOLDER/${ORG_NAME}/g" \
-e "s/PROJECT_CONCURRENCY_PLACEHOLDER/${PROJECT_CONCURRENCY}/g" \
-e "s/OBJECT_CONCURRENCY_PLACEHOLDER/${OBJECT_CONCURRENCY}/g" \
-e "s/RUN_OBJECTS_PHASE_PLACEHOLDER/${RUN_OBJECTS_PHASE}/g" \
-e "s/MILO_KUBECONFIG_SECRET_PLACEHOLDER/${MILO_KUBECONFIG_SECRET_NAME}/g" \
-e "s/MILO_KUBECONFIG_KEY_PLACEHOLDER/${MILO_KUBECONFIG_SECRET_KEY}/g" \
test/performance/config/perf-runner-job.yaml | task test-infra:kubectl -- apply -f -

echo "⏳ Waiting for Job completion …"
task test-infra:kubectl -- -n ${NS} wait --for=condition=Complete job/perf-runner --timeout=45m

echo "⬇️ Downloading results …"
mkdir -p reports/perf
# Prefer ConfigMap (works even if pod already terminated)
TEST_ID=$(task test-infra:kubectl -- -n ${NS} get cm perf-results -o jsonpath='{.data.test_id}' 2>/dev/null || true)
OUT_DIR_LOCAL="reports/perf/${TEST_ID:-latest}"
mkdir -p "$OUT_DIR_LOCAL"
task test-infra:kubectl -- -n ${NS} get cm perf-results -o jsonpath='{.data.results\.json}' > "$OUT_DIR_LOCAL/results.json" || true
task test-infra:kubectl -- -n ${NS} get cm perf-results -o jsonpath='{.data.report\.html}' > "$OUT_DIR_LOCAL/report.html" || true
# Fallback to copying from the pod if ConfigMap wasn't available
if [ ! -s "$OUT_DIR_LOCAL/results.json" ] || [ ! -s "$OUT_DIR_LOCAL/report.html" ]; then
POD=$(task test-infra:kubectl -- -n ${NS} get pods -l job-name=perf-runner -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
if [ -n "$POD" ]; then
task test-infra:kubectl -- -n ${NS} cp "$POD:/work/out/results.json" "$OUT_DIR_LOCAL/results.json" || true
task test-infra:kubectl -- -n ${NS} cp "$POD:/work/out/report.html" "$OUT_DIR_LOCAL/report.html" || true
fi
fi
echo "✅ Results saved to $OUT_DIR_LOCAL"

perf:cleanup:
desc: Cleanup resources created by the last perf run (org/projects/secrets/configmaps)
silent: true
cmds:
- |
set -euo pipefail
NS="${NS:-milo-system}"
MILO_KUBECONFIG_SECRET_NAME="${MILO_KUBECONFIG_SECRET_NAME:-milo-controller-manager-kubeconfig}"
MILO_KUBECONFIG_SECRET_KEY="${MILO_KUBECONFIG_SECRET_KEY:-kubeconfig}"
MILO_KUBECONFIG_PATH="${MILO_KUBECONFIG_PATH:-/work/milo-kubeconfig}"

if [ ! -f ".milo/kubeconfig" ]; then
echo "Error: .milo/kubeconfig not found. Run 'task dev:setup' first." >&2
exit 1
fi

echo "🔎 Discovering last test identifiers …"
# Allow override from CLI envs if ConfigMap isn't present
TEST_ID_CM=$(task test-infra:kubectl -- -n ${NS} get cm perf-results -o jsonpath='{.data.test_id}' 2>/dev/null || true)
ORG_NAME_CM=$(task test-infra:kubectl -- -n ${NS} get cm perf-results -o jsonpath='{.data.org_name}' 2>/dev/null || true)
TEST_ID="${TEST_ID:-$TEST_ID_CM}"
ORG_NAME="${ORG_NAME:-$ORG_NAME_CM}"
if [ -z "${TEST_ID}" ] || [ -z "${ORG_NAME}" ]; then
echo "No existing results found in namespace ${NS} (ConfigMap perf-results). Nothing to cleanup."
exit 0
fi

echo "🚮 Launching cleanup Job for test ${TEST_ID} …"
sed \
-e "s/NAMESPACE_PLACEHOLDER/${NS}/g" \
-e "s/TEST_ID_PLACEHOLDER/${TEST_ID}/g" \
-e "s/ORG_NAME_PLACEHOLDER/${ORG_NAME}/g" \
-e "s/MILO_KUBECONFIG_SECRET_PLACEHOLDER/${MILO_KUBECONFIG_SECRET_NAME}/g" \
-e "s/MILO_KUBECONFIG_KEY_PLACEHOLDER/${MILO_KUBECONFIG_SECRET_KEY}/g" \
test/performance/config/perf-cleanup-job.yaml | task test-infra:kubectl -- apply -f -

echo "⏳ Waiting for cleanup Job completion …"
task test-infra:kubectl -- -n ${NS} wait --for=condition=Complete job/perf-cleanup --timeout=30m

echo "🧹 Removing runner artifacts (keeping downloaded results) …"
task test-infra:kubectl -- -n ${NS} delete job/perf-runner --ignore-not-found
task test-infra:kubectl -- -n ${NS} delete job/perf-cleanup --ignore-not-found
task test-infra:kubectl -- -n ${NS} delete configmap perf-script --ignore-not-found
task test-infra:kubectl -- -n ${NS} delete configmap perf-results --ignore-not-found
echo "✅ Cleanup complete."
2 changes: 1 addition & 1 deletion config/apiserver/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ spec:
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
memory: 2G
startupProbe:
failureThreshold: 3
httpGet:
Expand Down
4 changes: 2 additions & 2 deletions config/dependencies/etcd/helmrelease.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ spec:
resources:
limits:
cpu: 500m
memory: 512Mi
memory: 2G
requests:
cpu: 200m
memory: 256Mi
Expand All @@ -57,7 +57,7 @@ spec:
metrics:
enabled: true
serviceMonitor:
enabled: false
enabled: true

# Logging configuration
extraEnvVars:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ metadata:
type: Opaque
stringData:
tokens.csv: |
test-admin-token,admin,1001,"system:masters"
test-user-token,test-user,1002,"system:authenticated"
test-admin-token,admin,admin,"system:masters"
test-user-token,test-user,test-user,"system:authenticated"
121 changes: 121 additions & 0 deletions test/performance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
### Milo performance runner

This performance suite provisions Milo/Etcd service monitors and measures CPU/Memory snapshots from VictoriaMetrics.

Files and structure:
- performance/scripts/perf_run.py: runner script executed inside a Kubernetes Job
- performance/config/perf-runner-job.yaml: Job template for the run phase
- performance/config/perf-cleanup-job.yaml: Job template for cleanup
- performance/config/perf-runner-rbac.yaml: ServiceAccount/Role/RoleBinding used by the jobs

#### Summary

- Creates a Milo `Organization`, then N `Projects`, waits for all to be Ready, and times it.
- Takes metrics snapshots before (baseline), after projects are ready, and optionally after per-project object creation.
- Optionally creates M `Secrets` and K `ConfigMaps` in each Project (parallelized), then measures again.
- Saves results to a ConfigMap and downloads a local HTML report and JSON.

#### Prerequisites

1) Bring up dev stack and observability:

```bash
task dev:setup && task dev:install-observability
```

2) Ensure a Milo kubeconfig secret exists in your cluster. By default the tasks mount `Secret/milo-controller-manager-kubeconfig` (key `kubeconfig`). You can override via env (see knobs below).

#### How to run

- Full run (org + projects + objects) with defaults:

```bash
task perf:run
```

- Projects-only (skip secrets/configmaps) and higher parallelism:

```bash
task perf:run -- RUN_OBJECTS_PHASE=false PROJECT_CONCURRENCY=10
```

- Cleanup all resources from the last run:

```bash
task perf:cleanup
```

#### Outputs

- In-cluster: ConfigMap `perf-results` in `NS` (default `milo-system`) with keys `results.json`, `report.html`, `test_id`, `org_name`.
- Local: `reports/perf/<test_id>/results.json` and `report.html` downloaded by the task after the Job completes. The HTML report includes grouped bar charts (CPU cores and Memory MB) and per-project delta KPIs for apiserver and etcd.

#### What the runner does

1) Baseline: query VictoriaMetrics for Milo apiserver and etcd CPU/memory.
2) Create Organization (no wait), then create N Projects, wait for all Projects Ready; record duration.
3) Stabilize, then snapshot “after projects”.
4) If enabled, create per-Project objects (Secrets/ConfigMaps) concurrently; stabilize, then snapshot “after secrets+configmaps”.

Snapshots come from VictoriaMetrics using `container_cpu_usage_seconds_total` (rate) and `container_memory_working_set_bytes` (avg_over_time) for pods matching the configured namespace and pod name regexes.

#### Configuration knobs (env vars)

Pass on the `task perf:run -- KEY=value ...` command line. Defaults shown in parentheses.

- Resource selection
- `NS` (milo-system): Namespace to run Job and store results ConfigMap
- `MILO_NAMESPACE` (milo-system): Namespace to measure apiserver/etcd pods
- `APISERVER_POD_REGEX` (milo-apiserver.*): Regex for apiserver pods
- `ETCD_POD_REGEX` (etcd.*): Regex for etcd pods

- Metrics source (VictoriaMetrics)
- `VM_NAMESPACE` (telemetry-system)
- `VM_SERVICE_NAME` (vmsingle-telemetry-system-vm-victoria-metrics-k8s-stack)
- `VM_PORT` (8428)
- `VM_BASE_URL` (optional override, e.g. http://hostname:8428). Default uses in-cluster FQDN: `http://<service>.<namespace>.svc.cluster.local:8428`.
- `MEASURE_WINDOW` (2m): Range window for rate/avg_over_time

- Scale and workload
- `NUM_PROJECTS` (100)
- `RUN_OBJECTS_PHASE` (true): Toggle per-project Secrets/ConfigMaps phase
- `NUM_SECRETS_PER_PROJECT` (100)
- `NUM_CONFIGMAPS_PER_PROJECT` (100)
- `PROJECT_CONCURRENCY` (4): Number of projects processed in parallel when creating objects
- `OBJECT_CONCURRENCY` (8): Secrets/ConfigMaps parallelism inside each project

- Stabilization windows
- `STABILIZE_SECONDS` (90): Sleep before snapshots after Projects and after Objects

- Identity / scoping
- `ORG_NAME` (auto-generated): Name of Organization to create
- `MILO_KUBECONFIG_SECRET_NAME` (milo-controller-manager-kubeconfig): Secret containing Milo kubeconfig
- `MILO_KUBECONFIG_SECRET_KEY` (kubeconfig): Secret key with kubeconfig content
- `MILO_KUBECONFIG_PATH` (/work/milo-kubeconfig): In-container path to mount kubeconfig
- `AUTH_BEARER_TOKEN` (optional): Override token injected into kubeconfig user for troubleshooting

#### Examples

- Measure project-only impact:

```bash
task perf:run -- RUN_OBJECTS_PHASE=false STABILIZE_SECONDS=60 NUM_PROJECTS=200
```

- Heavier objects phase, more parallelism:

```bash
task perf:run -- NUM_SECRETS_PER_PROJECT=500 NUM_CONFIGMAPS_PER_PROJECT=500 PROJECT_CONCURRENCY=12 OBJECT_CONCURRENCY=24
```

- Point to a custom VictoriaMetrics endpoint:

```bash
task perf:run -- VM_BASE_URL=http://vm.my-domain.local:8428
```

- Use a specific Organization name:

```bash
task perf:run -- ORG_NAME=perf-cow
```
52 changes: 52 additions & 0 deletions test/performance/config/perf-cleanup-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
apiVersion: batch/v1
kind: Job
metadata:
name: perf-cleanup
namespace: NAMESPACE_PLACEHOLDER
spec:
ttlSecondsAfterFinished: 300
backoffLimit: 0
template:
spec:
serviceAccountName: perf-runner
restartPolicy: Never
containers:
- name: cleanup
image: python:3.11
imagePullPolicy: IfNotPresent
env:
- name: TARGET_NAMESPACE
value: NAMESPACE_PLACEHOLDER
- name: RUN_MODE
value: cleanup
- name: TEST_ID
value: "TEST_ID_PLACEHOLDER"
- name: ORG_NAME
value: "ORG_NAME_PLACEHOLDER"
- name: MILO_KUBECONFIG_PATH
value: "/work/milo-kubeconfig"
volumeMounts:
- name: script
mountPath: /work/perf_run.py
subPath: perf_run.py
readOnly: true
- name: milo-kubeconfig
mountPath: /work/milo-kubeconfig
subPath: MILO_KUBECONFIG_KEY_PLACEHOLDER
readOnly: true
command: ["bash","-lc"]
args:
- >-
python -m pip install --no-cache-dir kubernetes requests pyyaml &&
python -u /work/perf_run.py
volumes:
- name: script
configMap:
name: perf-script
defaultMode: 0444
- name: milo-kubeconfig
secret:
secretName: MILO_KUBECONFIG_SECRET_PLACEHOLDER
defaultMode: 0400


Loading