@@ -25,6 +25,19 @@ export interface DatabaseUploadResult {
2525 zipped_upload_size_bytes ?: number ;
2626 /** Whether the uploaded database is an overlay base. */
2727 is_overlay_base ?: boolean ;
28+ /**
29+ * For overlay-base uploads only: the size in bytes that the zipped database
30+ * would have been if it had been cleaned at the `clear` cleanup level instead
31+ * of the `overlay` level.
32+ */
33+ clear_cleanup_zipped_size_bytes ?: number ;
34+ /**
35+ * For overlay-base uploads only: the time in milliseconds spent measuring the
36+ * `clear` cleanup size (cleaning up the cluster at the `clear` level and
37+ * bundling each database). This is a cluster-wide measurement, so it is the
38+ * same for every language in a run.
39+ */
40+ clear_cleanup_measurement_duration_ms ?: number ;
2841 /** Time taken to upload database in milliseconds. */
2942 upload_duration_ms ?: number ;
3043 /** If there was an error during database upload, this is its message. */
@@ -156,9 +169,86 @@ export async function cleanupAndUploadDatabases(
156169 } ) ;
157170 }
158171 }
172+
173+ // When we upload an overlay-base database, we cleaned the databases at the `overlay` level, which
174+ // retains more data than the `clear` level used for regular uploads. Measure what the zipped size
175+ // would have been at the `clear` level too, so we can compare the storage cost of overlay-base
176+ // databases against regular databases for the same repository.
177+ //
178+ // We skip this in debug mode, where the databases are preserved and uploaded as debug artifacts,
179+ // since cleaning them up at the `clear` level would discard data that is useful for debugging.
180+ if ( shouldUploadOverlayBase && ! config . debugMode ) {
181+ await withGroupAsync (
182+ "Measuring database size at the clear cleanup level" ,
183+ ( ) => recordClearCleanupSizes ( codeql , config , reports , logger ) ,
184+ ) ;
185+ }
186+
159187 return reports ;
160188}
161189
190+ /**
191+ * Cleans up the databases at the `clear` cleanup level and records the resulting zipped size for
192+ * each language in `clear_cleanup_zipped_size_bytes`, along with the time spent taking the
193+ * measurement in `clear_cleanup_measurement_duration_ms`.
194+ *
195+ * This mutates the entries of `reports` in place. It must run only after all overlay-base uploads
196+ * have completed, since the `clear` cleanup discards overlay data that the uploaded database
197+ * depends on.
198+ *
199+ * Failures here are non-fatal: this is telemetry-only, so we log and move on rather than failing
200+ * the workflow.
201+ */
202+ async function recordClearCleanupSizes (
203+ codeql : CodeQL ,
204+ config : Config ,
205+ reports : DatabaseUploadResult [ ] ,
206+ logger : Logger ,
207+ ) : Promise < void > {
208+ const startTime = performance . now ( ) ;
209+ try {
210+ try {
211+ await codeql . databaseCleanupCluster ( config , CleanupLevel . Clear ) ;
212+ } catch ( e ) {
213+ logger . warning (
214+ `Failed to clean up databases at the '${ CleanupLevel . Clear } ' level for ` +
215+ `size measurement: ${ util . getErrorMessage ( e ) } ` ,
216+ ) ;
217+ return ;
218+ }
219+
220+ for ( const language of config . languages ) {
221+ const report = reports . find ( ( r ) => r . language === language ) ;
222+ if ( report === undefined ) {
223+ continue ;
224+ }
225+ try {
226+ const bundledDb = await bundleDb ( config , language , codeql , language , {
227+ includeDiagnostics : false ,
228+ } ) ;
229+ report . clear_cleanup_zipped_size_bytes = fs . statSync ( bundledDb ) . size ;
230+ logger . debug (
231+ `Database for ${ language } is ` +
232+ `${ report . clear_cleanup_zipped_size_bytes } bytes zipped at the ` +
233+ `'${ CleanupLevel . Clear } ' cleanup level ` +
234+ `(vs. ${ report . zipped_upload_size_bytes } bytes at the ` +
235+ `'${ CleanupLevel . Overlay } ' level).` ,
236+ ) ;
237+ } catch ( e ) {
238+ logger . warning (
239+ `Failed to measure the '${ CleanupLevel . Clear } ' cleanup database size ` +
240+ `for ${ language } : ${ util . getErrorMessage ( e ) } ` ,
241+ ) ;
242+ }
243+ }
244+ } finally {
245+ const durationMs = performance . now ( ) - startTime ;
246+ for ( const report of reports ) {
247+ report . clear_cleanup_measurement_duration_ms = durationMs ;
248+ }
249+ }
250+ }
251+
162252/**
163253 * Uploads a bundled database to the GitHub API.
164254 *
0 commit comments