Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions examples/image_layer_optimize/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Benchmark: Docker layer optimization cache efficiency test.
"""

import logging
import time
from typing import Dict

import flyte
from flyte import Image
from flyte._internal.imagebuild.image_builder import ImageBuildEngine

# Base image is now defined at the environment level
env = flyte.TaskEnvironment(
name="benchmark",
image=(Image.from_debian_base(name="benchmark-base").with_pip_packages("torch", "numpy", "pandas")),
)


@env.task
async def benchmark_layer_optimization() -> Dict[str, float]:
print("Starting Docker layer optimization benchmark")

# No optimization
image_no_opt = Image.from_debian_base(name="benchmark-no-opt").with_pip_packages(
"torch", "numpy", "pandas", "requests"
)

start = time.time()
await ImageBuildEngine.build(image_no_opt, force=True, optimize_layers=False)
no_opt_time = time.time() - start
print(f"No optimization build: {no_opt_time:.1f}s")

# Phase 3: With optimization
image_opt = Image.from_debian_base(name="benchmark-opt").with_pip_packages("torch", "numpy", "pandas", "httpx")

start = time.time()
await ImageBuildEngine.build(image_opt, force=True, optimize_layers=True)
opt_time = time.time() - start
print(f"Optimized build: {opt_time:.1f}s")

speedup = no_opt_time / opt_time if opt_time > 0 else 1.0

print(f"no-opt={no_opt_time:.1f}s | opt={opt_time:.1f}s | speedup={speedup:.1f}x")

return {
"no_opt_time": no_opt_time,
"opt_time": opt_time,
"speedup": speedup,
}


if __name__ == "__main__":
flyte.init_from_config(log_level=logging.DEBUG)
run = flyte.with_runcontext(mode="remote", log_level=logging.DEBUG).run(benchmark_layer_optimization)
print(run.name)
print(run.url)
117 changes: 117 additions & 0 deletions examples/image_layer_optimize/heavybenchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
Benchmark: Docker layer optimization cache efficiency test.

This benchmark uses HEAVY dependencies (torch, tensorflow, transformers) to demonstrate
significant time savings from layer optimization.

Expected results:
- Without optimization: ~5-8 minutes (reinstalls ALL heavy packages)
- With optimization: ~10-30 seconds (reuses heavy layer cache)
- Speedup: 10-30x faster
"""

import logging
import time
from typing import Dict

import flyte
from flyte import Image
from flyte._internal.imagebuild.image_builder import ImageBuildEngine

# ============================================================================
# Base image with HEAVY dependencies - this warms the Docker cache
# ============================================================================
env = flyte.TaskEnvironment(
name="benchmark",
image=(
Image.from_debian_base(name="benchmark-base").with_pip_packages(
"torch", # ~800MB
"tensorflow", # ~500MB
"transformers", # large w/ deps
"numpy",
"pandas",
)
),
)


@env.task
async def benchmark_layer_optimization() -> Dict[str, float]:
"""
Benchmark layer optimization with heavy ML dependencies.

Phase 1: Add a small package WITHOUT optimization
Phase 2: Add a small package WITH optimization
"""
bar = "=" * 72
print(bar)
print("Docker Layer Optimization Benchmark (heavy deps)")
print(bar)

# ------------------------------------------------------------------------
# Phase 1: WITHOUT optimization
# ------------------------------------------------------------------------
print("\n[1/2] WITHOUT optimization: add 'requests' (expect full rebuild)")
image_no_opt = Image.from_debian_base(name="benchmark-no-opt").with_pip_packages(
"torch",
"tensorflow",
"transformers",
"numpy",
"pandas",
"requests",
)

start = time.time()
await ImageBuildEngine.build(image_no_opt, force=True, optimize_layers=False)
no_opt_time = time.time() - start
print(f" done: {no_opt_time:.1f}s ({no_opt_time / 60:.1f} min)")

# ------------------------------------------------------------------------
# Phase 2: WITH optimization
# ------------------------------------------------------------------------
print("\n[2/2] WITH optimization: add 'httpx' (expect cache hit on heavy layer)")
image_opt = Image.from_debian_base(name="benchmark-opt").with_pip_packages(
"torch",
"tensorflow",
"transformers",
"numpy",
"pandas",
"httpx",
)

start = time.time()
await ImageBuildEngine.build(image_opt, force=True, optimize_layers=True)
opt_time = time.time() - start
print(f" done: {opt_time:.1f}s ({opt_time / 60:.1f} min)")

# ------------------------------------------------------------------------
# Results
# ------------------------------------------------------------------------
speedup = no_opt_time / opt_time if opt_time > 0 else 1.0
time_saved = no_opt_time - opt_time

print("\n" + bar)
print("RESULTS")
print(bar)
print(f"no-opt: {no_opt_time:7.1f}s ({no_opt_time / 60:5.1f} min) full rebuild")
print(f"opt: {opt_time:7.1f}s ({opt_time / 60:5.1f} min) cache reuse")
print(f"speedup: {speedup:7.1f}x")
print(f"saved: {time_saved:7.1f}s ({time_saved / 60:5.1f} min)")
print(bar)

return {
"no_opt_time_seconds": no_opt_time,
"no_opt_time_minutes": no_opt_time / 60,
"opt_time_seconds": opt_time,
"opt_time_minutes": opt_time / 60,
"speedup": speedup,
"time_saved_seconds": time_saved,
"time_saved_minutes": time_saved / 60,
}


if __name__ == "__main__":
flyte.init_from_config(log_level=logging.DEBUG)
run = flyte.with_runcontext(mode="remote", log_level=logging.DEBUG).run(benchmark_layer_optimization)
print(run.name)
print(run.url)
75 changes: 75 additions & 0 deletions examples/image_layer_optimize/quickbenchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
Quick benchmark using scikit-learn instead of torch for faster results.

This benchmark uses a pre-built base image to avoid warming the cache inside the benchmark function.
"""

import asyncio
import logging
import time
from typing import Dict

import flyte
from flyte import Image
from flyte._internal.imagebuild.image_builder import ImageBuildEngine

# ============================================================================
# Create benchmark environment that uses the SAME base image
# ============================================================================
# By using the same Image definition, it will reuse the cached layers
benchmark_env = flyte.TaskEnvironment(
name="benchmark",
image=(Image.from_debian_base(name="benchmark-base").with_pip_packages("scikit-learn", "pandas")),
)


@benchmark_env.task
async def quick_benchmark() -> Dict[str, float]:
"""
Quick benchmark using scikit-learn instead of torch for faster results.

This assumes the base image is already built (cache is warm).
"""
print("🔥 Quick Benchmark: Layer Optimization")

# Phase 1: No optimization (rebuild all)
print("\n[1/2] Adding 'requests' WITHOUT optimization...")
no_opt = Image.from_debian_base(name="quick-no-opt").with_pip_packages("scikit-learn", "pandas", "requests")

start = time.time()
await ImageBuildEngine.build(no_opt, force=True, optimize_layers=False)
no_opt_time = time.time() - start
print(f" ✓ Done in {no_opt_time:.1f}s")

await asyncio.sleep(1)

# Phase 2: With optimization (cache hit on scikit-learn)
print("\n[2/2] Adding 'httpx' WITH optimization...")
opt = Image.from_debian_base(name="quick-opt").with_pip_packages("scikit-learn", "pandas", "httpx")

start = time.time()
await ImageBuildEngine.build(opt, force=True, optimize_layers=True)
opt_time = time.time() - start
print(f" ✓ Done in {opt_time:.1f}s")

# Results
speedup = no_opt_time / opt_time if opt_time > 0 else 1.0

print("\n" + "=" * 60)
print(f"Without optimization: {no_opt_time:5.1f}s")
print(f"With optimization: {opt_time:5.1f}s")
print(f"Speedup: {speedup:5.1f}x")
print("=" * 60)

return {
"no_opt_time": no_opt_time,
"opt_time": opt_time,
"speedup": speedup,
}


if __name__ == "__main__":
flyte.init_from_config(log_level=logging.DEBUG)
run = flyte.with_runcontext(mode="remote", log_level=logging.DEBUG).run(quick_benchmark)
print(run.name)
print(run.url)
2 changes: 1 addition & 1 deletion src/flyte/_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ def from_uv_script(
Args:
secret_mounts:
"""

ll = UVScript(
script=Path(script),
index_url=index_url,
Expand Down Expand Up @@ -847,7 +848,6 @@ def my_task(x: int) -> int:
:param extra_index_urls: extra index urls to use for pip install, default is None
:param pre: whether to allow pre-release versions, default is False
:param extra_args: extra arguments to pass to pip install, default is None
:param extra_args: extra arguments to pass to pip install, default is None
:param secret_mounts: list of secret to mount for the build process.
:return: Image
"""
Expand Down
13 changes: 13 additions & 0 deletions src/flyte/_internal/imagebuild/heavy_deps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Configuration for Docker image layer optimization.
"""

HEAVY_DEPENDENCIES = frozenset(
{
"tensorflow",
"torch",
"torchaudio",
"torchvision",
"scikit-learn",
}
)
Loading