1919 since_commit :
2020 description : ' Analyze since this commit SHA (overrides state tag)'
2121 default : ' '
22+ enable_clustering :
23+ description : ' Enable report clustering (groups similar changes)'
24+ default : ' true'
25+ type : boolean
26+ use_claude_clustering :
27+ description : ' Use Claude for enhanced clustering (requires ANTHROPIC_API_KEY)'
28+ default : ' false'
29+ type : boolean
2230 debug :
2331 description : ' Enable verbose debug logging'
2432 default : ' true'
@@ -146,19 +154,66 @@ jobs:
146154 REPORT_COUNT=$(ls reports/*.md 2>/dev/null | grep -v "00-summary" | wc -l || echo "0")
147155 echo "sdk_reports=$REPORT_COUNT" >> $GITHUB_OUTPUT
148156
157+ - name : Cluster similar reports
158+ id : cluster
159+ if : steps.analyze.outputs.sdk_reports > 1 && inputs.enable_clustering != 'false'
160+ env :
161+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
162+ run : |
163+ echo "=== Clustering Reports ==="
164+ echo "Reports to cluster: ${{ steps.analyze.outputs.sdk_reports }}"
165+
166+ mkdir -p clustered
167+
168+ # Build clustering arguments
169+ CLUSTER_ARGS="--input-dir ./reports --output-dir ./clustered"
170+
171+ # Check if Claude enhancement is requested and API key is available
172+ if [ "${{ inputs.use_claude_clustering }}" = "true" ] && [ -n "$ANTHROPIC_API_KEY" ]; then
173+ echo "Claude enhancement enabled"
174+ pip install anthropic --quiet
175+ CLUSTER_ARGS="$CLUSTER_ARGS --use-claude"
176+ elif [ "${{ inputs.use_claude_clustering }}" = "true" ]; then
177+ echo "Warning: Claude clustering requested but ANTHROPIC_API_KEY not set"
178+ echo "Falling back to heuristic clustering"
179+ fi
180+
181+ echo "Running: python3 scripts/cluster_sdk_reports.py $CLUSTER_ARGS"
182+ python3 scripts/cluster_sdk_reports.py $CLUSTER_ARGS 2>&1 | tee clustering.log
183+
184+ # Count clusters
185+ CLUSTER_COUNT=$(ls clustered/cluster-*.md 2>/dev/null | wc -l || echo "0")
186+ echo "clusters_created=$CLUSTER_COUNT" >> $GITHUB_OUTPUT
187+
188+ if [ -f clustered/clusters.json ]; then
189+ echo ""
190+ echo "=== Cluster Summary ==="
191+ cat clustered/00-clusters.md
192+ fi
193+
149194 - name : Upload analysis reports
150195 uses : actions/upload-artifact@v4
151196 with :
152197 name : sdk-diff-reports-${{ github.run_number }}
153198 path : reports/
154199 retention-days : 30
155200
201+ - name : Upload clustered reports
202+ if : steps.cluster.outputs.clusters_created > 0
203+ uses : actions/upload-artifact@v4
204+ with :
205+ name : sdk-clustered-reports-${{ github.run_number }}
206+ path : clustered/
207+ retention-days : 30
208+
156209 - name : Upload analyzer log
157210 if : always()
158211 uses : actions/upload-artifact@v4
159212 with :
160213 name : analyzer-log-${{ github.run_number }}
161- path : analyzer.log
214+ path : |
215+ analyzer.log
216+ clustering.log
162217 retention-days : 7
163218 if-no-files-found : ignore
164219
@@ -187,15 +242,16 @@ jobs:
187242 - name : Job summary
188243 if : always()
189244 run : |
190- cat >> $GITHUB_STEP_SUMMARY << ' EOF'
245+ cat >> $GITHUB_STEP_SUMMARY << EOF
191246 ## SDK Diff Analyzer Results
192247
193248 | Parameter | Value |
194249 |-----------|-------|
195250 | Mode | ${{ steps.range.outputs.mode }} |
196- | Since | `${{ steps.range.outputs.since || 'N/A' }}` |
197- | gdc-nas HEAD | `${{ steps.gdc_nas.outputs.head_sha }}` |
251+ | Since | \ `${{ steps.range.outputs.since || 'N/A' }}\ ` |
252+ | gdc-nas HEAD | \ `${{ steps.gdc_nas.outputs.head_sha }}\ ` |
198253 | SDK-relevant commits | ${{ steps.analyze.outputs.sdk_reports }} |
254+ | Clusters created | ${{ steps.cluster.outputs.clusters_created || '0' }} |
199255
200256 EOF
201257
@@ -204,3 +260,10 @@ jobs:
204260 echo "" >> $GITHUB_STEP_SUMMARY
205261 cat reports/00-summary.md >> $GITHUB_STEP_SUMMARY
206262 fi
263+
264+ if [ -f clustered/00-clusters.md ]; then
265+ echo "" >> $GITHUB_STEP_SUMMARY
266+ echo "### Clustering Summary" >> $GITHUB_STEP_SUMMARY
267+ echo "" >> $GITHUB_STEP_SUMMARY
268+ cat clustered/00-clusters.md >> $GITHUB_STEP_SUMMARY
269+ fi
0 commit comments