diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0166bb6b1c..b8753335aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,6 +44,8 @@ jobs: - name: Clone repository uses: actions/checkout@v5 + # TODO: restore `cargo test` once per-feature tests are compatible with + # both server-side and client-side compilation modes. - name: Check feature ${{ matrix.feature }} run: cargo check --no-default-features --features ${{ matrix.feature }} diff --git a/docs/Architecture.md b/docs/Architecture.md index adc484334e..8eb044f05e 100644 --- a/docs/Architecture.md +++ b/docs/Architecture.md @@ -1,21 +1,166 @@ # Sccache high level architecture -This schema shows at high level how sccache works. +Sccache supports two compilation modes: **server-side compilation** (legacy) and **client-side compilation** (new). The mode is controlled by the `SCCACHE_CLIENT_SIDE_COMPILE` environment variable. +## Server-Side Compilation (Legacy Mode) + +This is the default mode when `SCCACHE_CLIENT_SIDE_COMPILE` is unset or set to `0`. + +In this mode, the server performs all compilation work: + +```mermaid + sequenceDiagram + participant Client as Client Process + participant Server as Sccache Server + participant Storage as Cache Storage + + Client->>Server: 1. Compile Request (exe, args, cwd, env) + Server->>Server: 2. Detect Compiler + Server->>Server: 3. Preprocess & Hash + Server->>Storage: 4. Check Cache + alt Cache Lookup Result: Hit + rect rgba(0, 128, 0, 0.1) + Storage-->>Server: Cached Entry + Server-->>Client: 5a. Return Cached Result + end + else Hit: No - Cache Miss + rect rgba(200, 0, 0, 0.1) + Storage-->>Server: Miss + Server->>Server: 5b. Compile Locally + Server->>Storage: 6. Store Result + Server-->>Client: 7. Return Result + end + end +``` + +**Characteristics**: +- Server performs compiler detection, preprocessing, hash generation, and compilation +- All work happens on the server machine +- Server can become a bottleneck with many parallel clients +- Higher server CPU and memory usage + +## Client-Side Compilation (New Mode) + +Enabled by setting `SCCACHE_CLIENT_SIDE_COMPILE=1`. + +In this mode, the client performs compilation work and the server acts as pure storage: + +```mermaid + sequenceDiagram + participant Client as Client Process + participant Server as Sccache Server (Storage Only) + participant Storage as Cache Storage + + Client->>Client: 1. Detect Compiler + Client->>Client: 2. Preprocess & Hash + Client->>Server: 3. CacheGet Request (cache_key) + Server->>Storage: 4. Query Storage + alt Cache Lookup Result: Hit + rect rgba(0, 128, 0, 0.1) + Storage-->>Server: Cached Entry + Server-->>Client: 5a. Return Cache Entry + Client->>Client: Use Cached Result + end + else Hit: No - Cache Miss + rect rgba(200, 0, 0, 0.1) + Storage-->>Server: Miss + Server-->>Client: 5b. Cache Miss + Client->>Client: 6. Compile Locally + Client->>Server: 7. CachePut Request (cache_key, entry) + Server->>Storage: 8. Store in Cache + end + end +``` + +**Characteristics**: +- Client performs compiler detection, preprocessing, hash generation, and compilation +- Server only handles cache storage operations (get/put) +- Work is distributed across all clients (better scalability) +- Lower server CPU and memory usage +- Reduced network latency (single request instead of multiple round trips) + +**Why this is fast**: preprocessing in client-side mode is cheap — it only concatenates source files rather than running the full C/C++ preprocessor. This avoids the expensive `#include` expansion and macro evaluation that dominates traditional preprocessing time, making it practical to move this work to the client without a performance penalty. + +**Note**: Client-side compilation is functional but considered experimental. Enable it by setting `SCCACHE_CLIENT_SIDE_COMPILE=1`. + +## Comparison + +| Aspect | Server-Side (Legacy) | Client-Side (New) | +|--------|---------------------|-------------------| +| Compiler Detection | Server | Client (with caching) | +| Preprocessing | Server | Client | +| Hash Generation | Server | Client | +| Compilation | Server | Client | +| Server Role | Full compilation service | Pure storage service | +| Server CPU Usage | High | Low | +| Server Memory Usage | Moderate | Low | +| Client Overhead | Low | Moderate | +| Scalability | Limited by server | Excellent | +| Network Requests | Multiple round trips | Single request | + +## Cache Key Generation + +Regardless of the mode, cache keys are generated from: ```mermaid flowchart LR id1[[Environment variables]] --> hash id2[[Compiler binary]] --> hash id3[[Compiler arguments]] --> hash - id5[[Files]] --> | | hash - Compile --> Upload - Storage[(Storage)] --> | yes | Download - hash([hash]) --> | exists? | Storage - Storage --> | no | Compile - Upload --> Storage + id5[[Preprocessed Files]] --> hash + hash([BLAKE3 Hash]) --> key[Cache Key] + + style id1 fill:#e8f4fd,stroke:#5dade2,color:#333 + style id2 fill:#e8f4fd,stroke:#5dade2,color:#333 + style id3 fill:#e8f4fd,stroke:#5dade2,color:#333 + style id5 fill:#e8f4fd,stroke:#5dade2,color:#333 + style hash fill:#eafaf1,stroke:#58d68d,color:#333 + style key fill:#fef9e7,stroke:#f4d03f,color:#333 ``` +### C/C++ vs Rust + +The "preprocessing" step differs significantly between languages: + +- **C/C++**: runs the compiler's preprocessor (`gcc -E` / `clang -E`) to expand all `#include` directives and macros, producing a single translation unit. The preprocessed output is then hashed. This is the expensive part — include expansion can pull in thousands of header files. + +- **Rust**: there is no preprocessor. Instead, sccache runs `rustc --emit dep-info`, a lightweight invocation that outputs a `.d` file listing all source files and environment variables the crate depends on — without compiling anything. Sccache then hashes each source file individually, along with extern crate `.rlib` files, static libraries, and any target JSON file. This dependency discovery step is fast compared to full compilation. + +In client-side mode, this work moves to the client. For Rust, the cost is minimal since `--emit dep-info` is cheap. For C/C++, preprocessing is replaced by simple file concatenation, avoiding the expensive include expansion entirely. + +For more details about how hash generation works, see [the caching documentation](Caching.md). + +## Protocol + +### Server-Side Mode Protocol + +- **Request**: `Compile(Compile)` - Contains executable path, arguments, working directory, environment variables +- **Response**: `CompileFinished(CompileFinished)` - Contains exit code, stdout, stderr, and output file paths + +### Client-Side Mode Protocol + +- **Request**: `CacheGet(CacheGetRequest)` - Contains cache key +- **Response**: `CacheGetResponse::Hit(Vec)` - Cache entry as bytes +- **Response**: `CacheGetResponse::Miss` - Cache miss +- **Request**: `CachePut(CachePutRequest)` - Contains cache key and entry bytes +- **Response**: `CachePutResponse(Duration)` - Storage duration + +The protocol supports version negotiation to maintain backward compatibility during migration from server-side to client-side mode. + +## Storage Backends + +Both modes use the same cache storage backends: + +- **Local Disk** (`SCCACHE_DIR`) +- **S3 Compatible** (`SCCACHE_BUCKET`, `SCCACHE_ENDPOINT`) +- **Redis** (`SCCACHE_REDIS_ENDPOINT`) +- **Memcached** (`SCCACHE_MEMCACHED_ENDPOINT`) +- **Google Cloud Storage** (`SCCACHE_GCS_BUCKET`) +- **Azure Blob Storage** (`SCCACHE_AZURE_CONNECTION_STRING`) +- **GitHub Actions Cache** (`SCCACHE_GHA_CACHE_URL`) +- **WebDAV** (`SCCACHE_WEBDAV_ENDPOINT`) +- **Alibaba Cloud OSS** (`SCCACHE_OSS_BUCKET`) +- **Tencent Cloud COS** (`SCCACHE_COS_BUCKET`) -For more details about hash generation works, see [the caching documentation](Caching.md). +See [Configuration.md](Configuration.md) for storage backend configuration details. diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 906c69f5f2..210b73cfae 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -297,7 +297,7 @@ impl Storage for RemoteStorage { pub fn build_single_cache( cache_type: &CacheType, basedirs: &[Vec], - _pool: &tokio::runtime::Handle, + pool: &tokio::runtime::Handle, ) -> Result> { match cache_type { #[cfg(feature = "azure")] @@ -330,6 +330,7 @@ pub fn build_single_cache( service_account.as_deref(), (*rw_mode).into(), credential_url.as_deref(), + pool, ) .map_err(|err| anyhow!("create gcs cache failed: {err:?}"))?; let storage = RemoteStorage::new(operator, basedirs.to_vec()); diff --git a/src/cache/cache_io.rs b/src/cache/cache_io.rs index c4c2fa1347..1c54d82c53 100644 --- a/src/cache/cache_io.rs +++ b/src/cache/cache_io.rs @@ -13,6 +13,7 @@ use super::utils::{get_file_mode, set_file_mode}; use crate::errors::*; use fs_err as fs; +use serde::{Deserialize, Serialize}; use std::fmt; use std::io::{Cursor, Read, Seek, Write}; use std::path::PathBuf; @@ -21,7 +22,7 @@ use zip::write::FileOptions; use zip::{CompressionMethod, ZipArchive, ZipWriter}; /// Cache object sourced by a file. -#[derive(Clone)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct FileObjectSource { /// Identifier for this object. Should be unique within a compilation unit. /// Note that a compilation unit is a single source file in C/C++ and a crate in Rust. @@ -172,6 +173,9 @@ impl CacheRead { /// Data to be stored in the compiler cache. pub struct CacheWrite { zip: ZipWriter>>, + /// Pre-built zip bytes (from `from_bytes`). When set, `finish()` returns + /// these bytes directly instead of finalising the ZipWriter. + prebuilt: Option>, } impl CacheWrite { @@ -179,6 +183,16 @@ impl CacheWrite { pub fn new() -> CacheWrite { CacheWrite { zip: ZipWriter::new(Cursor::new(vec![])), + prebuilt: None, + } + } + + /// Create a cache entry from pre-serialized bytes received over the wire. + /// `finish()` will return these bytes unchanged. + pub fn from_bytes(data: Vec) -> CacheWrite { + CacheWrite { + zip: ZipWriter::new(Cursor::new(vec![])), + prebuilt: Some(data), } } @@ -257,7 +271,10 @@ impl CacheWrite { /// Finish writing data to the cache entry writer, and return the data. pub fn finish(self) -> Result> { - let CacheWrite { mut zip } = self; + let CacheWrite { mut zip, prebuilt } = self; + if let Some(bytes) = prebuilt { + return Ok(bytes); + } let cur = zip.finish().context("Failed to finish cache entry zip")?; Ok(cur.into_inner()) } diff --git a/src/cache/gcs.rs b/src/cache/gcs.rs index f3d742cd4c..6a1ac35337 100644 --- a/src/cache/gcs.rs +++ b/src/cache/gcs.rs @@ -45,6 +45,7 @@ impl GCSCache { service_account: Option<&str>, rw_mode: CacheMode, credential_url: Option<&str>, + pool: &tokio::runtime::Handle, ) -> Result { let mut builder = Gcs::default() .bucket(bucket) @@ -64,11 +65,9 @@ impl GCSCache { .map_err(|err| anyhow!("gcs credential url is invalid: {err:?}"))?; // For TaskCluster integration, fetch token directly and provide it to OpenDAL - let token = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current() - .block_on(fetch_taskcluster_token(cred_url, rw_to_scope(rw_mode))) - }) - .map_err(|e| anyhow!("Failed to fetch TaskCluster token: {e}"))?; + let token = pool + .block_on(fetch_taskcluster_token(cred_url, rw_to_scope(rw_mode))) + .map_err(|e| anyhow!("Failed to fetch TaskCluster token: {e}"))?; builder = builder.token(token); } @@ -93,7 +92,10 @@ async fn fetch_taskcluster_token(url: &str, scope: &str) -> Result { debug!("gcs: start to load token from: {}", url); let user_agent = format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); - let client = Client::builder().user_agent(user_agent).build()?; + let client = Client::builder() + .user_agent(user_agent) + .timeout(std::time::Duration::from_secs(30)) + .build()?; let res = client.get(url).send().await?; if res.status().is_success() { diff --git a/src/commands.rs b/src/commands.rs index 5d8a838483..903d5f3670 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -12,16 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::cache::storage_from_config; +use crate::cache::{Cache, CacheWrite, FileObjectSource, Storage, storage_from_config}; use crate::client::{ServerConnection, connect_to_server, connect_with_retry}; use crate::cmdline::{Command, StatsFormat}; -use crate::compiler::ColorMode; +use crate::compiler::{CacheControl, Cacheable, ColorMode, CompilerArguments, get_compiler_info}; use crate::config::{Config, default_disk_cache_dir}; use crate::jobserver::Client; use crate::mock_command::{CommandChild, CommandCreatorSync, ProcessCommandCreator, RunCommand}; -use crate::protocol::{Compile, CompileFinished, CompileResponse, Request, Response}; +use crate::protocol::{ + CacheGetRequest, CacheGetResponse, CachePutRequest, Compile, CompileFinished, CompileResponse, + Request, Response, +}; use crate::server::{self, DistInfo, ServerInfo, ServerStartup, ServerStats}; -use crate::util::daemonize; +use crate::util::{daemonize, run_input_output}; +use async_trait::async_trait; use byteorder::{BigEndian, ByteOrder}; use fs::{File, OpenOptions}; use fs_err as fs; @@ -33,6 +37,7 @@ use std::io::{self, IsTerminal, Write}; use std::os::unix::process::ExitStatusExt; use std::path::Path; use std::process; +use std::sync::Arc; use std::time::Duration; use strip_ansi_escapes::Writer; use tokio::io::AsyncReadExt; @@ -606,12 +611,221 @@ where { trace!("do_compile"); let exe_path = which_in(exe, path, cwd)?; + + if env::var("SCCACHE_CLIENT_SIDE_COMPILE") + .map(|v| v == "1") + .unwrap_or(false) + { + return do_compile_client_side( + creator, runtime, conn, &exe_path, cmdline, cwd, env_vars, stdout, stderr, + ); + } + let res = request_compile(&mut conn, &exe_path, &cmdline, cwd, env_vars)?; handle_compile_response( creator, runtime, &mut conn, res, &exe_path, cmdline, cwd, stdout, stderr, ) } +/// A no-op `Storage` implementation used on the client side solely to satisfy +/// the `generate_hash_key` API, which needs a storage reference for preprocessor +/// cache lookups. All operations return Miss / success without touching any +/// actual storage backend; real cache I/O goes through the server connection. +struct NoopStorage; + +#[async_trait] +impl Storage for NoopStorage { + async fn get(&self, _key: &str) -> Result { + Ok(Cache::Miss) + } + + async fn put(&self, _key: &str, _entry: CacheWrite) -> Result { + Ok(std::time::Duration::default()) + } + + fn location(&self) -> String { + "noop".to_string() + } + + async fn current_size(&self) -> Result> { + Ok(None) + } + + async fn max_size(&self) -> Result> { + Ok(None) + } +} + +/// Perform client-side compilation with the server acting as a pure cache store. +/// +/// Steps: +/// 1. Detect the compiler. +/// 2. Parse the compiler arguments. +/// 3. Run the preprocessor locally and compute the cache hash key. +/// 4. Ask the server for the cache entry (`CacheGet`). +/// 5a. Cache hit → extract output artifacts and write stdout/stderr. +/// 5b. Cache miss → compile locally, store the result (`CachePut`), write output. +#[allow(clippy::too_many_arguments)] +fn do_compile_client_side( + creator: T, + runtime: &mut Runtime, + mut conn: ServerConnection, + exe_path: &Path, + cmdline: Vec, + cwd: &Path, + env_vars: Vec<(OsString, OsString)>, + stdout: &mut dyn Write, + stderr: &mut dyn Write, +) -> Result +where + T: CommandCreatorSync, +{ + trace!("do_compile_client_side"); + let pool = runtime.handle().clone(); + + // Step 1: Detect compiler. + let (compiler, _proxy) = runtime.block_on(get_compiler_info( + creator.clone(), + exe_path, + cwd, + &cmdline, + &env_vars, + &pool, + None, + ))?; + + // Step 2: Parse arguments. + let mut hasher = match compiler.parse_arguments(&cmdline, cwd, &env_vars) { + CompilerArguments::Ok(h) => h, + CompilerArguments::NotCompilation | CompilerArguments::CannotCache(_, _) => { + // Not a compilation or un-cacheable — run the compiler directly. + let mut cmd = creator.clone().new_command_sync(exe_path); + cmd.args(&cmdline).current_dir(cwd); + let status = runtime.block_on(async move { + let child = cmd.spawn().await?; + child.wait().await.context("failed to wait for compiler") + })?; + return Ok(status.code().unwrap_or(-1)); + } + }; + + // Step 3: Preprocess locally and generate the hash key. + // NoopStorage satisfies the API; preprocessor cache operations are ignored. + let noop_storage = Arc::new(NoopStorage) as Arc; + let hash_result = runtime.block_on(hasher.generate_hash_key( + &creator, + cwd.to_path_buf(), + env_vars.clone(), + false, // may_dist + &pool, + false, // rewrite_includes_only + noop_storage, + CacheControl::Default, + ))?; + + let key = hash_result.key; + let compilation = hash_result.compilation; + + // Collect the expected output paths once (relative → absolute). + let outputs: Vec = compilation + .outputs() + .map(|o| FileObjectSource { + path: cwd.join(&o.path), + ..o + }) + .collect(); + + // Step 4: Ask the server for a cached result. + // The server extracts artifacts directly to `outputs` paths on a hit, so + // only stdout/stderr cross the IPC channel – no large data transfer. + let cache_response = conn.request(Request::CacheGet(CacheGetRequest { + key: key.clone(), + output_paths: outputs.clone(), + }))?; + + match cache_response { + // ── Step 5a: Cache hit ────────────────────────────────────────────── + Response::CacheGet(CacheGetResponse::Hit { + stdout: cached_stdout, + stderr: cached_stderr, + }) => { + debug!("client-side cache hit for key {}", key); + // Artifacts already extracted by the server; just forward stdio. + stdout.write_all(&cached_stdout)?; + stderr.write_all(&cached_stderr)?; + Ok(0) + } + + // ── Step 5b: Cache miss or server-side error ──────────────────────── + cache_miss_or_error => { + match &cache_miss_or_error { + Response::CacheGet(CacheGetResponse::Miss) => { + debug!("client-side cache miss for key {}", key); + } + Response::CacheGet(CacheGetResponse::Error(msg)) => { + debug!( + "client-side cache error for key {} (treating as miss): {}", + key, msg + ); + } + other => bail!( + "unexpected response from server for CacheGet (key={}): {:?}", + key, + other + ), + } + + // Build the concrete compile command. + let mut path_transformer = crate::dist::PathTransformer::new(); + let (compile_cmd, _, cacheable) = compilation + .generate_compile_commands(&mut path_transformer, true) + .context("failed to generate compile commands")?; + + // Run the compiler. + let compile_exe = compile_cmd.get_executable(); + let compile_args = compile_cmd.get_arguments(); + let compile_env = compile_cmd.get_env_vars(); + let compile_cwd = compile_cmd.get_cwd(); + + let mut cmd = creator.clone().new_command_sync(&compile_exe); + cmd.args(&compile_args) + .env_clear() + .envs(compile_env) + .current_dir(&compile_cwd); + + let output = runtime.block_on(run_input_output(cmd, None))?; + let exit_code = output.status.code().unwrap_or(-1); + + // If compilation succeeded and the result is cacheable, store it. + // The server reads the output files from disk directly. + if output.status.success() && cacheable == Cacheable::Yes { + // Best-effort: log but don't fail on cache-put errors. + match conn.request(Request::CachePut(CachePutRequest { + key: key.clone(), + output_paths: outputs, + stdout: output.stdout.clone(), + stderr: output.stderr.clone(), + })) { + Ok(Response::CachePut(crate::protocol::CachePutResponse::Error(msg))) => { + debug!( + "server failed to store cache entry for key {}: {}", + key, msg + ); + } + Err(e) => { + debug!("failed to store cache entry for key {}: {:#}", key, e); + } + _ => {} + } + } + + stdout.write_all(&output.stdout)?; + stderr.write_all(&output.stderr)?; + Ok(exit_code) + } + } +} + /// Run `cmd` and return the process exit status. pub fn run_command(cmd: Command) -> Result { // Config isn't required for all commands, but if it's broken then we should flag diff --git a/src/protocol.rs b/src/protocol.rs index 8760ba679f..a200a828f7 100644 --- a/src/protocol.rs +++ b/src/protocol.rs @@ -1,8 +1,32 @@ -use crate::compiler::ColorMode; +use crate::cache::FileObjectSource; +use crate::compiler::{ColorMode, PreprocessorCacheEntry}; use crate::server::{DistInfo, ServerInfo}; use serde::{Deserialize, Serialize}; use std::ffi::OsString; +/// Protocol version for backward compatibility tracking. +/// +/// Version 1: Original protocol with Compile request (server-side compilation) +/// Version 2: Extended protocol with CacheGet/CachePut (client-side compilation) +/// +/// The protocol is backward compatible through enum variants: +/// - Old clients (v1) send only: ZeroStats, GetStats, DistStatus, Shutdown, Compile +/// - New clients (v2) can send all requests including: CacheGet, CachePut, etc. +/// - Old servers (v1) handle: ZeroStats, GetStats, DistStatus, Shutdown, Compile +/// - New servers (v2) handle all requests +/// +/// Compatibility matrix: +/// - Old client + Old server: Works (v1 protocol) +/// - Old client + New server: Works (server supports v1 requests) +/// - New client + Old server: Client must fall back to Compile for cache operations +/// - New client + New server: Works optimally (v2 protocol with client-side compilation) +#[allow(dead_code)] +pub const PROTOCOL_VERSION: u32 = 2; + +/// Legacy protocol version (server-side compilation only) +#[allow(dead_code)] +pub const PROTOCOL_VERSION_1: u32 = 1; + /// A client request. #[derive(Serialize, Deserialize, Debug)] pub enum Request { @@ -16,6 +40,14 @@ pub enum Request { Shutdown, /// Execute a compile or fetch a cached compilation result. Compile(Compile), + /// Get a cache entry by key. + CacheGet(CacheGetRequest), + /// Store a cache entry by key. + CachePut(CachePutRequest), + /// Get a preprocessor cache entry. + PreprocessorCacheGet(String), + /// Store a preprocessor cache entry. + PreprocessorCachePut(PreprocessorCachePutRequest), } /// A server response. @@ -33,6 +65,14 @@ pub enum Response { ShuttingDown(Box), /// Second response for `Request::Compile`, containing the results of the compilation. CompileFinished(CompileFinished), + /// Response for `Request::CacheGet`. + CacheGet(CacheGetResponse), + /// Response for `Request::CachePut`. + CachePut(CachePutResponse), + /// Response for `Request::PreprocessorCacheGet`. + PreprocessorCacheGet(Option), + /// Response for `Request::PreprocessorCachePut`. + PreprocessorCachePut, } /// Possible responses from the server for a `Compile` request. @@ -73,3 +113,95 @@ pub struct Compile { /// The environment variables present when the compiler was executed, as (var, val). pub env_vars: Vec<(OsString, OsString)>, } + +/// Request to get a cache entry by key. +/// +/// The server extracts output artifacts directly to `output_paths` on a hit, +/// so no large data ever crosses the IPC channel. +#[derive(Serialize, Deserialize, Debug)] +pub struct CacheGetRequest { + /// The cache key to look up. + pub key: String, + /// Where to extract output artifacts on a cache hit. + pub output_paths: Vec, +} + +/// Request to store a cache entry. +/// +/// The server reads the output artifacts from `output_paths` directly from +/// disk (client and server share the same filesystem), so no large data +/// crosses the IPC channel. +#[derive(Serialize, Deserialize, Debug)] +pub struct CachePutRequest { + /// The cache key to store under. + pub key: String, + /// Paths to the output artifacts the server should pack into the entry. + pub output_paths: Vec, + /// The compiler's stdout. + pub stdout: Vec, + /// The compiler's stderr. + pub stderr: Vec, +} + +/// Request to store a preprocessor cache entry. +#[derive(Serialize, Deserialize, Debug)] +pub struct PreprocessorCachePutRequest { + /// The preprocessor cache key. + pub key: String, + /// The preprocessor cache entry to store. + pub entry: PreprocessorCacheEntry, +} + +/// Response for a cache get request. +/// +/// On a hit the server has already extracted the artifacts to the paths +/// supplied in the request; the response carries only stdout/stderr. +#[derive(Serialize, Deserialize, Debug)] +pub enum CacheGetResponse { + /// Cache hit – artifacts extracted to the requested paths. + Hit { stdout: Vec, stderr: Vec }, + /// Cache miss – entry not found. + Miss, + /// Error occurred during cache lookup. + Error(String), +} + +/// Response for a cache put request. +#[derive(Serialize, Deserialize, Debug)] +pub enum CachePutResponse { + /// Cache entry stored successfully. + Success(std::time::Duration), + /// Error occurred during cache storage (best-effort, not fatal). + Error(String), +} + +/// Protocol capability detection helpers. +impl Request { + /// Check if this request requires protocol v2 features. + /// + /// Returns true for CacheGet, CachePut, and preprocessor cache requests + /// which are only available in v2 servers. + pub fn requires_v2(&self) -> bool { + matches!( + self, + Request::CacheGet(_) + | Request::CachePut(_) + | Request::PreprocessorCacheGet(_) + | Request::PreprocessorCachePut(_) + ) + } + + /// Check if this request is compatible with protocol v1. + /// + /// Returns true for legacy requests that work with both old and new servers. + pub fn is_v1_compatible(&self) -> bool { + matches!( + self, + Request::ZeroStats + | Request::GetStats + | Request::DistStatus + | Request::Shutdown + | Request::Compile(_) + ) + } +} diff --git a/src/server.rs b/src/server.rs index 755f5d43a0..a5458c5ac2 100644 --- a/src/server.rs +++ b/src/server.rs @@ -898,6 +898,22 @@ where Message::WithoutBody(Response::ShuttingDown(Box::new(info))) }) } + Request::CacheGet(req) => { + debug!("handle_client: cache_get"); + me.handle_cache_get(req).await + } + Request::CachePut(req) => { + debug!("handle_client: cache_put"); + me.handle_cache_put(req).await + } + Request::PreprocessorCacheGet(key) => { + debug!("handle_client: preprocessor_cache_get"); + me.handle_preprocessor_cache_get(key).await + } + Request::PreprocessorCachePut(req) => { + debug!("handle_client: preprocessor_cache_put"); + me.handle_preprocessor_cache_put(req).await + } } }) } @@ -995,13 +1011,22 @@ where where T: AsyncRead + AsyncWrite + Unpin + Send + 'static, { + // Use a generous default frame-length limit (512 MB) so that large cache + // entries (e.g. Rust rlib files with embedded bitcode and debug info) can be + // transferred over the IPC channel in client-side compilation mode, while + // still guarding against OOM from corrupted messages. + // SCCACHE_MAX_FRAME_LENGTH overrides this value entirely. + const DEFAULT_MAX_FRAME_LENGTH: usize = 512 * 1024 * 1024; // 512 MB let mut builder = length_delimited::Builder::new(); if let Ok(max_frame_length_str) = env::var("SCCACHE_MAX_FRAME_LENGTH") { if let Ok(max_frame_length) = max_frame_length_str.parse::() { builder.max_frame_length(max_frame_length); } else { warn!("Content of SCCACHE_MAX_FRAME_LENGTH is not a valid number, using default"); + builder.max_frame_length(DEFAULT_MAX_FRAME_LENGTH); } + } else { + builder.max_frame_length(DEFAULT_MAX_FRAME_LENGTH); } let io = builder.new_framed(socket); @@ -1053,6 +1078,149 @@ where *self.stats.lock().await = ServerStats::default(); } + /// Handle a cache get request from a client. + /// + /// On a hit the server extracts the artifacts directly to the paths + /// specified by the client (both share the same filesystem), so no + /// large data has to be transferred over the IPC channel. + /// + /// # Trust assumption + /// The client-supplied `output_paths` are written to without validation. + /// This is safe because client and server are co-located on the same + /// machine and communicate over a local socket — the IPC channel is not + /// exposed to untrusted processes. + async fn handle_cache_get( + &self, + req: crate::protocol::CacheGetRequest, + ) -> Result { + use crate::cache::Cache; + use crate::protocol::{CacheGetResponse, Response}; + + match self.storage.get(&req.key).await { + Ok(Cache::Hit(mut cache_read)) => { + // Read stdout/stderr before consuming the entry. + let stdout = cache_read.get_stdout(); + let stderr = cache_read.get_stderr(); + // Extract compiled artifacts directly to disk – no IPC bulk transfer. + match cache_read.extract_objects(req.output_paths, &self.rt).await { + Ok(()) => Ok(Message::WithoutBody(Response::CacheGet( + CacheGetResponse::Hit { stdout, stderr }, + ))), + Err(e) => { + warn!( + "CacheGet({}): failed to extract cache entry: {e:#}", + req.key + ); + Ok(Message::WithoutBody(Response::CacheGet( + CacheGetResponse::Error(format!( + "key={}: failed to extract cache entry: {e:#}", + req.key + )), + ))) + } + } + } + Ok(Cache::Miss) | Ok(Cache::None) | Ok(Cache::Recache) => Ok(Message::WithoutBody( + Response::CacheGet(CacheGetResponse::Miss), + )), + Err(e) => { + warn!("CacheGet({}): storage error: {e:#}", req.key); + Ok(Message::WithoutBody(Response::CacheGet( + CacheGetResponse::Error(format!("key={}: storage error: {e:#}", req.key)), + ))) + } + } + } + + /// Handle a cache put request from a client. + /// + /// The server reads the output artifacts from disk directly (shared + /// filesystem), packs them into a cache entry, and stores it. + /// + /// # Trust assumption + /// The client-supplied `output_paths` are read from without validation. + /// See [`handle_cache_get`] for rationale. + async fn handle_cache_put( + &self, + req: crate::protocol::CachePutRequest, + ) -> Result { + use crate::cache::CacheWrite; + use crate::protocol::{CachePutResponse, Response}; + + let pool = self.rt.clone(); + let mut entry = match CacheWrite::from_objects(req.output_paths, &pool).await { + Ok(e) => e, + Err(e) => { + let msg = format!("key={}: failed to read output files: {e:#}", req.key); + warn!("CachePut({}): {}", req.key, msg); + return Ok(Message::WithoutBody(Response::CachePut( + CachePutResponse::Error(msg), + ))); + } + }; + let _ = entry.put_stdout(&req.stdout); + let _ = entry.put_stderr(&req.stderr); + + match self.storage.put(&req.key, entry).await { + Ok(duration) => Ok(Message::WithoutBody(Response::CachePut( + CachePutResponse::Success(duration), + ))), + Err(e) => { + let msg = format!("key={}: failed to store to backend: {e:#}", req.key); + warn!("CachePut({}): {}", req.key, msg); + Ok(Message::WithoutBody(Response::CachePut( + CachePutResponse::Error(msg), + ))) + } + } + } + + /// Handle a preprocessor cache get request. + async fn handle_preprocessor_cache_get(&self, key: String) -> Result { + use crate::protocol::Response; + use std::io::Read; + + match self.storage.get_preprocessor_cache_entry(&key).await { + Ok(Some(mut reader)) => { + // Read the preprocessor cache entry + use crate::compiler::PreprocessorCacheEntry; + + // Read all bytes from the reader + let mut bytes = Vec::new(); + match reader.read_to_end(&mut bytes) { + Ok(_) => { + // Parse the preprocessor cache entry + match PreprocessorCacheEntry::read(&bytes) { + Ok(entry) => Ok(Message::WithoutBody(Response::PreprocessorCacheGet( + Some(entry), + ))), + Err(_) => { + Ok(Message::WithoutBody(Response::PreprocessorCacheGet(None))) + } + } + } + Err(_) => Ok(Message::WithoutBody(Response::PreprocessorCacheGet(None))), + } + } + Ok(None) => Ok(Message::WithoutBody(Response::PreprocessorCacheGet(None))), + Err(_) => Ok(Message::WithoutBody(Response::PreprocessorCacheGet(None))), + } + } + + /// Handle a preprocessor cache put request. + async fn handle_preprocessor_cache_put( + &self, + req: crate::protocol::PreprocessorCachePutRequest, + ) -> Result { + use crate::protocol::Response; + + let _ = self + .storage + .put_preprocessor_cache_entry(&req.key, req.entry) + .await; + Ok(Message::WithoutBody(Response::PreprocessorCachePut)) + } + /// Handle a compile request from a client. /// /// This will handle a compile request entirely, generating a response with diff --git a/src/test/tests.rs b/src/test/tests.rs index 8d283a1b2b..c2020a8253 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -24,7 +24,6 @@ use crate::test::utils::*; use fs::File; use fs_err as fs; use futures::channel::oneshot::{self, Sender}; -#[cfg(not(target_os = "macos"))] use serial_test::serial; use std::io::{Cursor, Write}; #[cfg(not(target_os = "macos"))] @@ -173,7 +172,11 @@ fn test_server_stats() { } #[test] +#[serial] fn test_server_unsupported_compiler() { + // This test exercises server-side compilation; disable client-side mode. + // Use #[serial] to avoid races with parallel tests that also modify env vars. + unsafe { std::env::set_var("SCCACHE_CLIENT_SIDE_COMPILE", "0") }; let f = TestFixture::new(); let (addr, sender, server_creator, child) = run_server_thread(f.tempdir.path(), None); // Connect to the server. @@ -223,7 +226,11 @@ fn test_server_unsupported_compiler() { } #[test] +#[serial] fn test_server_compile() { + // This test exercises server-side compilation; disable client-side mode. + // Use #[serial] to avoid races with parallel tests that also modify env vars. + unsafe { std::env::set_var("SCCACHE_CLIENT_SIDE_COMPILE", "0") }; let _ = env_logger::try_init(); let f = TestFixture::new(); let gcc = f.mk_bin("gcc").unwrap();