clickhouse-java/.github/workflows/benchmarks.yml at main · ClickHouse/clickhouse-java · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
name: Benchmarks
description: Runs minimal JMH benchmark

on:
  schedule:
    - cron: "55 15 * * *"
  workflow_dispatch:
    inputs:
      pr:
        description: "Pull request#"
        required: false
      threshold:
        description: "Regression threshold (Δ% on Time or Alloc/op)"
        required: false
        default: "10"
  issue_comment:
    types: [created]

env:
  CHC_BRANCH: "main"
  CH_VERSION: "25.3"
  JAVA_VERSION: 17
  # Default Δ% above which a regression / improvement is flagged and the
  # PR check is failed. Overridable per workflow_dispatch input or per
  # `/benchmark threshold=N` comment.
  DEFAULT_THRESHOLD_PCT: "10"

# NOTE: there is intentionally no workflow-level `concurrency:` block.
# `issue_comment` events fire for *every* comment on every PR / issue,
# including those from bots (e.g. sonarqubecloud). A workflow-level
# `cancel-in-progress` group keyed on the PR number would cancel an
# in-flight legitimate `/benchmark` run as soon as any unrelated bot
# commented on the same PR. The per-PR concurrency rule is enforced on
# the `jmh` job instead, so unrelated comment events leave the job
# skipped without claiming the concurrency slot.

jobs:
  jmh:
    name: "Mininal JMH Benchmarks"
    runs-on: "ubuntu-latest"
    timeout-minutes: 30
    permissions:
      contents: read
      pull-requests: write
      issues: write
      actions: read
    # Single fan-in filter, modelled on `.github/workflows/claude.yml`:
    # the job runs for the daily schedule, manual `workflow_dispatch`,
    # or a `/benchmark` slash-command from a non-bot repo collaborator
    # on a pull request. Bot comments and chat comments leave the job
    # skipped — no failed run, no notification, no concurrency
    # collision.
    if: |
      startsWith(github.repository, 'ClickHouse/') &&
      (
        github.event_name == 'schedule' ||
        github.event_name == 'workflow_dispatch' ||
        (
          github.event_name == 'issue_comment' &&
          github.event.issue.pull_request != null &&
          github.event.sender.type != 'Bot' &&
          github.event.comment.user.type != 'Bot' &&
          startsWith(github.event.comment.body, '/benchmark') &&
          contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)
        )
      )
    # One running benchmark per PR (and per-SHA for the daily
    # schedule). Concurrency lives on this job, not on the workflow,
    # so unrelated comment events (which the job-level `if` filters
    # out) never claim the slot or cancel an in-flight run.
    concurrency:
      group: ${{ github.workflow }}-jmh-${{ github.event.issue.number || github.event.inputs.pr || github.sha }}
      cancel-in-progress: true
    steps:
      - name: Acknowledge /benchmark trigger
        if: github.event_name == 'issue_comment'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh api -X POST \
            "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" \
            -f content='rocket' || true

      - name: Resolve PR number and threshold
        id: pr
        env:
          COMMENT_BODY: ${{ github.event.comment.body }}
          DISPATCH_PR: ${{ github.event.inputs.pr }}
          DISPATCH_THRESHOLD: ${{ github.event.inputs.threshold }}
          DEFAULT_THRESHOLD: ${{ env.DEFAULT_THRESHOLD_PCT }}
        run: |
          case "${{ github.event_name }}" in
            issue_comment)
              # Accept `/benchmark threshold=15` or `/benchmark threshold=7.5`.
              T=$(printf '%s' "$COMMENT_BODY" | grep -oE 'threshold=[0-9]+(\.[0-9]+)?' | head -1 | cut -d= -f2 || true)
              [ -z "$T" ] && T="$DEFAULT_THRESHOLD"
              echo "number=${{ github.event.issue.number }}" >> "$GITHUB_OUTPUT"
              echo "threshold=$T" >> "$GITHUB_OUTPUT"
              ;;
            workflow_dispatch)
              echo "number=$DISPATCH_PR" >> "$GITHUB_OUTPUT"
              echo "threshold=${DISPATCH_THRESHOLD:-$DEFAULT_THRESHOLD}" >> "$GITHUB_OUTPUT"
              ;;
            *)
              echo "number=" >> "$GITHUB_OUTPUT"
              echo "threshold=$DEFAULT_THRESHOLD" >> "$GITHUB_OUTPUT"
              ;;
          esac

      - name: Post "started" comment
        if: github.event_name == 'issue_comment' && steps.pr.outputs.number != ''
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh api -X POST \
            "repos/${{ github.repository }}/issues/${{ steps.pr.outputs.number }}/comments" \
            -f body="JMH benchmark run started: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" || true

      - name: Check out Git repository
        uses: actions/checkout@v4
        with:
          ref: ${{ env.CHC_BRANCH }}

      # The benchmark code runs against the PR's working tree (see the
      # PR checkout below), but the comparison tooling lives in this
      # workflow's contract on `main`. Stash a copy now so the compare
      # step still works even when the PR branch was forked before
      # `.github/scripts/compare-jmh.py` existed.
      - name: Stash comparison tooling from main
        run: |
          mkdir -p "$RUNNER_TEMP/jmh-tools"
          cp -v .github/scripts/compare-jmh.py "$RUNNER_TEMP/jmh-tools/"

      - name: Check out PR
        if: steps.pr.outputs.number != ''
        run: |
          git fetch --no-tags --prune --progress --no-recurse-submodules --depth=1 \
            origin pull/${{ steps.pr.outputs.number }}/merge:merged-pr && git checkout merged-pr

      - name: Install JDK and Maven
        uses: actions/setup-java@v4
        with:
          distribution: "temurin"
          java-version: ${{ env.JAVA_VERSION }}
          cache: "maven"

      - name: Build
        run: mvn --batch-mode --no-transfer-progress -Dj8 -DskipTests=true clean install

      - name: Prepare Dataset
        run: |
          cd ./performance &&
          mvn --batch-mode --no-transfer-progress clean compile exec:exec -Dexec.executable=java \
          -Dexec.args="-classpath %classpath com.clickhouse.benchmark.data.DataSetGenerator -input sample_dataset.sql -name default -rows 100000"

      - name: Run Benchmarks
        run: |
          cd ./performance &&
          mvn --batch-mode --no-transfer-progress clean compile exec:exec -Dexec.executable=java -Dexec.args="-classpath %classpath com.clickhouse.benchmark.BenchmarkRunner \
          -l 100000,10000 -m 3 -t 15 -b q,i -d file://default.csv"

      - name: Upload test results
        uses: actions/upload-artifact@v4
        if: success()
        with:
          name: result ${{ github.job }}
          path: |
            performance/jmh-results*

      # Compare against the latest scheduled run on `main` and post a
      # markdown comment. Only relevant when this run is tied to a PR;
      # scheduled / non-PR runs skip these steps. We never fail the
      # workflow if comparison fails — it's reporting, not gating.
      - name: Fetch baseline results (latest successful main schedule)
        id: baseline
        if: steps.pr.outputs.number != ''
        continue-on-error: true
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          mkdir -p baseline-results
          RUN_ID=$(gh run list \
            --workflow benchmarks.yml \
            --branch main \
            --status success \
            --limit 20 \
            --repo "${{ github.repository }}" \
            --json databaseId,event \
            -q 'map(select(.event=="schedule"))[0].databaseId // empty')
          if [ -z "$RUN_ID" ]; then
            echo "No scheduled baseline run found on main"
            echo "found=false" >> "$GITHUB_OUTPUT"
            exit 0
          fi
          echo "Baseline run: $RUN_ID"
          if gh run download "$RUN_ID" --dir baseline-results --repo "${{ github.repository }}"; then
            echo "found=true" >> "$GITHUB_OUTPUT"
            echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
          else
            echo "Failed to download baseline artifacts"
            echo "found=false" >> "$GITHUB_OUTPUT"
          fi

      - name: Compare benchmark results
        id: compare
        if: steps.pr.outputs.number != '' && steps.baseline.outputs.found == 'true'
        continue-on-error: true
        run: |
          python3 "$RUNNER_TEMP/jmh-tools/compare-jmh.py" \
            --baseline baseline-results \
            --current performance \
            --baseline-run-id "${{ steps.baseline.outputs.run_id }}" \
            --current-run-id "${{ github.run_id }}" \
            --repo "${{ github.repository }}" \
            --server-url "${{ github.server_url }}" \
            --threshold-pct "${{ steps.pr.outputs.threshold }}" \
            --output comparison.md \
            --summary-output compare-summary.env
          # Surface the script's summary file as step outputs so the
          # follow-up "enforce threshold" step can decide whether to
          # fail the job — without skipping the comment post.
          cat compare-summary.env >> "$GITHUB_OUTPUT"
          echo "ok=true" >> "$GITHUB_OUTPUT"

      - name: Post baseline-not-found comment
        if: |
          steps.pr.outputs.number != '' &&
          steps.baseline.outputs.found != 'true'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh pr comment "${{ steps.pr.outputs.number }}" \
            --repo "${{ github.repository }}" \
            --body "JMH benchmark comparison skipped: no successful scheduled run on \`main\` was found to use as a baseline." || true

      - name: Post comparison comment
        if: steps.compare.outputs.ok == 'true'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh pr comment "${{ steps.pr.outputs.number }}" \
            --repo "${{ github.repository }}" \
            --body-file comparison.md

      # Fail the job — and therefore the PR check — when the comparison
      # script flagged at least one regression beyond the threshold.
      # This runs *after* the comment has been posted so reviewers still
      # see the full table on the PR.
      - name: Enforce regression threshold
        if: steps.compare.outputs.ok == 'true'
        run: |
          REGRESSIONS="${{ steps.compare.outputs.regressions }}"
          THRESHOLD="${{ steps.pr.outputs.threshold }}"
          if [ -n "$REGRESSIONS" ] && [ "$REGRESSIONS" -gt 0 ]; then
            echo "::error::$REGRESSIONS benchmark(s) regressed by more than ${THRESHOLD}% vs baseline."
            exit 1
          fi
          echo "No regressions over ${THRESHOLD}%."