-
Notifications
You must be signed in to change notification settings - Fork 623
258 lines (239 loc) · 10.5 KB
/
benchmarks.yml
File metadata and controls
258 lines (239 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
name: Benchmarks
description: Runs minimal JMH benchmark
on:
schedule:
- cron: "55 15 * * *"
workflow_dispatch:
inputs:
pr:
description: "Pull request#"
required: false
threshold:
description: "Regression threshold (Δ% on Time or Alloc/op)"
required: false
default: "10"
issue_comment:
types: [created]
env:
CHC_BRANCH: "main"
CH_VERSION: "25.3"
JAVA_VERSION: 17
# Default Δ% above which a regression / improvement is flagged and the
# PR check is failed. Overridable per workflow_dispatch input or per
# `/benchmark threshold=N` comment.
DEFAULT_THRESHOLD_PCT: "10"
# NOTE: there is intentionally no workflow-level `concurrency:` block.
# `issue_comment` events fire for *every* comment on every PR / issue,
# including those from bots (e.g. sonarqubecloud). A workflow-level
# `cancel-in-progress` group keyed on the PR number would cancel an
# in-flight legitimate `/benchmark` run as soon as any unrelated bot
# commented on the same PR. The per-PR concurrency rule is enforced on
# the `jmh` job instead, so unrelated comment events leave the job
# skipped without claiming the concurrency slot.
jobs:
jmh:
name: "Mininal JMH Benchmarks"
runs-on: "ubuntu-latest"
timeout-minutes: 30
permissions:
contents: read
pull-requests: write
issues: write
actions: read
# Single fan-in filter, modelled on `.github/workflows/claude.yml`:
# the job runs for the daily schedule, manual `workflow_dispatch`,
# or a `/benchmark` slash-command from a non-bot repo collaborator
# on a pull request. Bot comments and chat comments leave the job
# skipped — no failed run, no notification, no concurrency
# collision.
if: |
startsWith(github.repository, 'ClickHouse/') &&
(
github.event_name == 'schedule' ||
github.event_name == 'workflow_dispatch' ||
(
github.event_name == 'issue_comment' &&
github.event.issue.pull_request != null &&
github.event.sender.type != 'Bot' &&
github.event.comment.user.type != 'Bot' &&
startsWith(github.event.comment.body, '/benchmark') &&
contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)
)
)
# One running benchmark per PR (and per-SHA for the daily
# schedule). Concurrency lives on this job, not on the workflow,
# so unrelated comment events (which the job-level `if` filters
# out) never claim the slot or cancel an in-flight run.
concurrency:
group: ${{ github.workflow }}-jmh-${{ github.event.issue.number || github.event.inputs.pr || github.sha }}
cancel-in-progress: true
steps:
- name: Acknowledge /benchmark trigger
if: github.event_name == 'issue_comment'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api -X POST \
"repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" \
-f content='rocket' || true
- name: Resolve PR number and threshold
id: pr
env:
COMMENT_BODY: ${{ github.event.comment.body }}
DISPATCH_PR: ${{ github.event.inputs.pr }}
DISPATCH_THRESHOLD: ${{ github.event.inputs.threshold }}
DEFAULT_THRESHOLD: ${{ env.DEFAULT_THRESHOLD_PCT }}
run: |
case "${{ github.event_name }}" in
issue_comment)
# Accept `/benchmark threshold=15` or `/benchmark threshold=7.5`.
T=$(printf '%s' "$COMMENT_BODY" | grep -oE 'threshold=[0-9]+(\.[0-9]+)?' | head -1 | cut -d= -f2 || true)
[ -z "$T" ] && T="$DEFAULT_THRESHOLD"
echo "number=${{ github.event.issue.number }}" >> "$GITHUB_OUTPUT"
echo "threshold=$T" >> "$GITHUB_OUTPUT"
;;
workflow_dispatch)
echo "number=$DISPATCH_PR" >> "$GITHUB_OUTPUT"
echo "threshold=${DISPATCH_THRESHOLD:-$DEFAULT_THRESHOLD}" >> "$GITHUB_OUTPUT"
;;
*)
echo "number=" >> "$GITHUB_OUTPUT"
echo "threshold=$DEFAULT_THRESHOLD" >> "$GITHUB_OUTPUT"
;;
esac
- name: Post "started" comment
if: github.event_name == 'issue_comment' && steps.pr.outputs.number != ''
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api -X POST \
"repos/${{ github.repository }}/issues/${{ steps.pr.outputs.number }}/comments" \
-f body="JMH benchmark run started: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" || true
- name: Check out Git repository
uses: actions/checkout@v4
with:
ref: ${{ env.CHC_BRANCH }}
# The benchmark code runs against the PR's working tree (see the
# PR checkout below), but the comparison tooling lives in this
# workflow's contract on `main`. Stash a copy now so the compare
# step still works even when the PR branch was forked before
# `.github/scripts/compare-jmh.py` existed.
- name: Stash comparison tooling from main
run: |
mkdir -p "$RUNNER_TEMP/jmh-tools"
cp -v .github/scripts/compare-jmh.py "$RUNNER_TEMP/jmh-tools/"
- name: Check out PR
if: steps.pr.outputs.number != ''
run: |
git fetch --no-tags --prune --progress --no-recurse-submodules --depth=1 \
origin pull/${{ steps.pr.outputs.number }}/merge:merged-pr && git checkout merged-pr
- name: Install JDK and Maven
uses: actions/setup-java@v4
with:
distribution: "temurin"
java-version: ${{ env.JAVA_VERSION }}
cache: "maven"
- name: Build
run: mvn --batch-mode --no-transfer-progress -Dj8 -DskipTests=true clean install
- name: Prepare Dataset
run: |
cd ./performance &&
mvn --batch-mode --no-transfer-progress clean compile exec:exec -Dexec.executable=java \
-Dexec.args="-classpath %classpath com.clickhouse.benchmark.data.DataSetGenerator -input sample_dataset.sql -name default -rows 100000"
- name: Run Benchmarks
run: |
cd ./performance &&
mvn --batch-mode --no-transfer-progress clean compile exec:exec -Dexec.executable=java -Dexec.args="-classpath %classpath com.clickhouse.benchmark.BenchmarkRunner \
-l 100000,10000 -m 3 -t 15 -b q,i -d file://default.csv"
- name: Upload test results
uses: actions/upload-artifact@v4
if: success()
with:
name: result ${{ github.job }}
path: |
performance/jmh-results*
# Compare against the latest scheduled run on `main` and post a
# markdown comment. Only relevant when this run is tied to a PR;
# scheduled / non-PR runs skip these steps. We never fail the
# workflow if comparison fails — it's reporting, not gating.
- name: Fetch baseline results (latest successful main schedule)
id: baseline
if: steps.pr.outputs.number != ''
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
mkdir -p baseline-results
RUN_ID=$(gh run list \
--workflow benchmarks.yml \
--branch main \
--status success \
--limit 20 \
--repo "${{ github.repository }}" \
--json databaseId,event \
-q 'map(select(.event=="schedule"))[0].databaseId // empty')
if [ -z "$RUN_ID" ]; then
echo "No scheduled baseline run found on main"
echo "found=false" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "Baseline run: $RUN_ID"
if gh run download "$RUN_ID" --dir baseline-results --repo "${{ github.repository }}"; then
echo "found=true" >> "$GITHUB_OUTPUT"
echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
else
echo "Failed to download baseline artifacts"
echo "found=false" >> "$GITHUB_OUTPUT"
fi
- name: Compare benchmark results
id: compare
if: steps.pr.outputs.number != '' && steps.baseline.outputs.found == 'true'
continue-on-error: true
run: |
python3 "$RUNNER_TEMP/jmh-tools/compare-jmh.py" \
--baseline baseline-results \
--current performance \
--baseline-run-id "${{ steps.baseline.outputs.run_id }}" \
--current-run-id "${{ github.run_id }}" \
--repo "${{ github.repository }}" \
--server-url "${{ github.server_url }}" \
--threshold-pct "${{ steps.pr.outputs.threshold }}" \
--output comparison.md \
--summary-output compare-summary.env
# Surface the script's summary file as step outputs so the
# follow-up "enforce threshold" step can decide whether to
# fail the job — without skipping the comment post.
cat compare-summary.env >> "$GITHUB_OUTPUT"
echo "ok=true" >> "$GITHUB_OUTPUT"
- name: Post baseline-not-found comment
if: |
steps.pr.outputs.number != '' &&
steps.baseline.outputs.found != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh pr comment "${{ steps.pr.outputs.number }}" \
--repo "${{ github.repository }}" \
--body "JMH benchmark comparison skipped: no successful scheduled run on \`main\` was found to use as a baseline." || true
- name: Post comparison comment
if: steps.compare.outputs.ok == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh pr comment "${{ steps.pr.outputs.number }}" \
--repo "${{ github.repository }}" \
--body-file comparison.md
# Fail the job — and therefore the PR check — when the comparison
# script flagged at least one regression beyond the threshold.
# This runs *after* the comment has been posted so reviewers still
# see the full table on the PR.
- name: Enforce regression threshold
if: steps.compare.outputs.ok == 'true'
run: |
REGRESSIONS="${{ steps.compare.outputs.regressions }}"
THRESHOLD="${{ steps.pr.outputs.threshold }}"
if [ -n "$REGRESSIONS" ] && [ "$REGRESSIONS" -gt 0 ]; then
echo "::error::$REGRESSIONS benchmark(s) regressed by more than ${THRESHOLD}% vs baseline."
exit 1
fi
echo "No regressions over ${THRESHOLD}%."