Skip to content

Commit ad159d3

Browse files
committed
Measure overlay-base database size at the clear cleanup level
This lets us compare the storage cost of overlay-base and trimmed databases for the same commit.
1 parent 9cea582 commit ad159d3

2 files changed

Lines changed: 133 additions & 0 deletions

File tree

lib/entry-points.js

Lines changed: 43 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/database-upload.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,19 @@ export interface DatabaseUploadResult {
2525
zipped_upload_size_bytes?: number;
2626
/** Whether the uploaded database is an overlay base. */
2727
is_overlay_base?: boolean;
28+
/**
29+
* For overlay-base uploads only: the size in bytes that the zipped database
30+
* would have been if it had been cleaned at the `clear` cleanup level instead
31+
* of the `overlay` level.
32+
*/
33+
clear_cleanup_zipped_size_bytes?: number;
34+
/**
35+
* For overlay-base uploads only: the time in milliseconds spent measuring the
36+
* `clear` cleanup size (cleaning up the cluster at the `clear` level and
37+
* bundling each database). This is a cluster-wide measurement, so it is the
38+
* same for every language in a run.
39+
*/
40+
clear_cleanup_measurement_duration_ms?: number;
2841
/** Time taken to upload database in milliseconds. */
2942
upload_duration_ms?: number;
3043
/** If there was an error during database upload, this is its message. */
@@ -156,9 +169,86 @@ export async function cleanupAndUploadDatabases(
156169
});
157170
}
158171
}
172+
173+
// When we upload an overlay-base database, we cleaned the databases at the `overlay` level, which
174+
// retains more data than the `clear` level used for regular uploads. Measure what the zipped size
175+
// would have been at the `clear` level too, so we can compare the storage cost of overlay-base
176+
// databases against regular databases for the same repository.
177+
//
178+
// We skip this in debug mode, where the databases are preserved and uploaded as debug artifacts,
179+
// since cleaning them up at the `clear` level would discard data that is useful for debugging.
180+
if (shouldUploadOverlayBase && !config.debugMode) {
181+
await withGroupAsync(
182+
"Measuring database size at the clear cleanup level",
183+
() => recordClearCleanupSizes(codeql, config, reports, logger),
184+
);
185+
}
186+
159187
return reports;
160188
}
161189

190+
/**
191+
* Cleans up the databases at the `clear` cleanup level and records the resulting zipped size for
192+
* each language in `clear_cleanup_zipped_size_bytes`, along with the time spent taking the
193+
* measurement in `clear_cleanup_measurement_duration_ms`.
194+
*
195+
* This mutates the entries of `reports` in place. It must run only after all overlay-base uploads
196+
* have completed, since the `clear` cleanup discards overlay data that the uploaded database
197+
* depends on.
198+
*
199+
* Failures here are non-fatal: this is telemetry-only, so we log and move on rather than failing
200+
* the workflow.
201+
*/
202+
async function recordClearCleanupSizes(
203+
codeql: CodeQL,
204+
config: Config,
205+
reports: DatabaseUploadResult[],
206+
logger: Logger,
207+
): Promise<void> {
208+
const startTime = performance.now();
209+
try {
210+
try {
211+
await codeql.databaseCleanupCluster(config, CleanupLevel.Clear);
212+
} catch (e) {
213+
logger.warning(
214+
`Failed to clean up databases at the '${CleanupLevel.Clear}' level for ` +
215+
`size measurement: ${util.getErrorMessage(e)}`,
216+
);
217+
return;
218+
}
219+
220+
for (const language of config.languages) {
221+
const report = reports.find((r) => r.language === language);
222+
if (report === undefined) {
223+
continue;
224+
}
225+
try {
226+
const bundledDb = await bundleDb(config, language, codeql, language, {
227+
includeDiagnostics: false,
228+
});
229+
report.clear_cleanup_zipped_size_bytes = fs.statSync(bundledDb).size;
230+
logger.debug(
231+
`Database for ${language} is ` +
232+
`${report.clear_cleanup_zipped_size_bytes} bytes zipped at the ` +
233+
`'${CleanupLevel.Clear}' cleanup level ` +
234+
`(vs. ${report.zipped_upload_size_bytes} bytes at the ` +
235+
`'${CleanupLevel.Overlay}' level).`,
236+
);
237+
} catch (e) {
238+
logger.warning(
239+
`Failed to measure the '${CleanupLevel.Clear}' cleanup database size ` +
240+
`for ${language}: ${util.getErrorMessage(e)}`,
241+
);
242+
}
243+
}
244+
} finally {
245+
const durationMs = performance.now() - startTime;
246+
for (const report of reports) {
247+
report.clear_cleanup_measurement_duration_ms = durationMs;
248+
}
249+
}
250+
}
251+
162252
/**
163253
* Uploads a bundled database to the GitHub API.
164254
*

0 commit comments

Comments
 (0)