Skip to content

Commit 4b06230

Browse files
tychtjanclaude
andcommitted
feat: add report clustering to group similar SDK changes
- Add cluster_sdk_reports.py for heuristic-based report clustering - Group commits by: JIRA ticket, OpenAPI spec service, service+change type - Optional Claude API enhancement for semantic similarity analysis - Generate merged reports for batch processing by agents - Reduces agent runs by grouping related changes together Example: 9 reports -> 4 clusters = 5 fewer agent runs needed Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 3616dda commit 4b06230

3 files changed

Lines changed: 829 additions & 4 deletions

File tree

.github/workflows/sdk-diff-analyzer.yaml

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ on:
1919
since_commit:
2020
description: 'Analyze since this commit SHA (overrides state tag)'
2121
default: ''
22+
enable_clustering:
23+
description: 'Enable report clustering (groups similar changes)'
24+
default: 'true'
25+
type: boolean
26+
use_claude_clustering:
27+
description: 'Use Claude for enhanced clustering (requires ANTHROPIC_API_KEY)'
28+
default: 'false'
29+
type: boolean
2230
debug:
2331
description: 'Enable verbose debug logging'
2432
default: 'true'
@@ -146,19 +154,66 @@ jobs:
146154
REPORT_COUNT=$(ls reports/*.md 2>/dev/null | grep -v "00-summary" | wc -l || echo "0")
147155
echo "sdk_reports=$REPORT_COUNT" >> $GITHUB_OUTPUT
148156
157+
- name: Cluster similar reports
158+
id: cluster
159+
if: steps.analyze.outputs.sdk_reports > 1 && inputs.enable_clustering != 'false'
160+
env:
161+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
162+
run: |
163+
echo "=== Clustering Reports ==="
164+
echo "Reports to cluster: ${{ steps.analyze.outputs.sdk_reports }}"
165+
166+
mkdir -p clustered
167+
168+
# Build clustering arguments
169+
CLUSTER_ARGS="--input-dir ./reports --output-dir ./clustered"
170+
171+
# Check if Claude enhancement is requested and API key is available
172+
if [ "${{ inputs.use_claude_clustering }}" = "true" ] && [ -n "$ANTHROPIC_API_KEY" ]; then
173+
echo "Claude enhancement enabled"
174+
pip install anthropic --quiet
175+
CLUSTER_ARGS="$CLUSTER_ARGS --use-claude"
176+
elif [ "${{ inputs.use_claude_clustering }}" = "true" ]; then
177+
echo "Warning: Claude clustering requested but ANTHROPIC_API_KEY not set"
178+
echo "Falling back to heuristic clustering"
179+
fi
180+
181+
echo "Running: python3 scripts/cluster_sdk_reports.py $CLUSTER_ARGS"
182+
python3 scripts/cluster_sdk_reports.py $CLUSTER_ARGS 2>&1 | tee clustering.log
183+
184+
# Count clusters
185+
CLUSTER_COUNT=$(ls clustered/cluster-*.md 2>/dev/null | wc -l || echo "0")
186+
echo "clusters_created=$CLUSTER_COUNT" >> $GITHUB_OUTPUT
187+
188+
if [ -f clustered/clusters.json ]; then
189+
echo ""
190+
echo "=== Cluster Summary ==="
191+
cat clustered/00-clusters.md
192+
fi
193+
149194
- name: Upload analysis reports
150195
uses: actions/upload-artifact@v4
151196
with:
152197
name: sdk-diff-reports-${{ github.run_number }}
153198
path: reports/
154199
retention-days: 30
155200

201+
- name: Upload clustered reports
202+
if: steps.cluster.outputs.clusters_created > 0
203+
uses: actions/upload-artifact@v4
204+
with:
205+
name: sdk-clustered-reports-${{ github.run_number }}
206+
path: clustered/
207+
retention-days: 30
208+
156209
- name: Upload analyzer log
157210
if: always()
158211
uses: actions/upload-artifact@v4
159212
with:
160213
name: analyzer-log-${{ github.run_number }}
161-
path: analyzer.log
214+
path: |
215+
analyzer.log
216+
clustering.log
162217
retention-days: 7
163218
if-no-files-found: ignore
164219

@@ -187,15 +242,16 @@ jobs:
187242
- name: Job summary
188243
if: always()
189244
run: |
190-
cat >> $GITHUB_STEP_SUMMARY << 'EOF'
245+
cat >> $GITHUB_STEP_SUMMARY << EOF
191246
## SDK Diff Analyzer Results
192247
193248
| Parameter | Value |
194249
|-----------|-------|
195250
| Mode | ${{ steps.range.outputs.mode }} |
196-
| Since | `${{ steps.range.outputs.since || 'N/A' }}` |
197-
| gdc-nas HEAD | `${{ steps.gdc_nas.outputs.head_sha }}` |
251+
| Since | \`${{ steps.range.outputs.since || 'N/A' }}\` |
252+
| gdc-nas HEAD | \`${{ steps.gdc_nas.outputs.head_sha }}\` |
198253
| SDK-relevant commits | ${{ steps.analyze.outputs.sdk_reports }} |
254+
| Clusters created | ${{ steps.cluster.outputs.clusters_created || '0' }} |
199255
200256
EOF
201257
@@ -204,3 +260,10 @@ jobs:
204260
echo "" >> $GITHUB_STEP_SUMMARY
205261
cat reports/00-summary.md >> $GITHUB_STEP_SUMMARY
206262
fi
263+
264+
if [ -f clustered/00-clusters.md ]; then
265+
echo "" >> $GITHUB_STEP_SUMMARY
266+
echo "### Clustering Summary" >> $GITHUB_STEP_SUMMARY
267+
echo "" >> $GITHUB_STEP_SUMMARY
268+
cat clustered/00-clusters.md >> $GITHUB_STEP_SUMMARY
269+
fi

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ target-version = "py310"
121121
# Standalone analyzer scripts - ignore performance suggestions
122122
"scripts/gdc_nas_diff_analyzer.py" = ["PERF401", "PIE810"]
123123
"scripts/gdc_nas_api_analyzer.py" = ["PERF401"]
124+
"scripts/cluster_sdk_reports.py" = ["PERF401"]
124125

125126
[tool.ruff.format]
126127
exclude = ['(gooddata-api-client|.*\.snapshot\..*)']

0 commit comments

Comments
 (0)