Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ COPY . .
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
CGO_ENABLED=1 go build -trimpath -ldflags='-s -w' -o /out/triplet ./cmd/triplet \
&& CGO_ENABLED=0 go build -trimpath -ldflags='-s -w' -o /out/triplet-healthcheck ./cmd/triplet-healthcheck
&& CGO_ENABLED=0 go build -trimpath -ldflags='-s -w' -o /out/triplet-healthcheck ./cmd/triplet-healthcheck \
&& CGO_ENABLED=0 go build -trimpath -ldflags='-s -w' -o /out/triplet-cache-cleanup ./cmd/triplet-cache-cleanup

FROM base AS test-runner
WORKDIR /app
Expand Down Expand Up @@ -223,6 +224,7 @@ COPY --chown=triplet:triplet deploy/compose/images/ /var/lib/triplet/testdata/im

COPY --from=build /out/triplet /usr/local/bin/triplet
COPY --from=build /out/triplet-healthcheck /usr/local/bin/triplet-healthcheck
COPY --from=build /out/triplet-cache-cleanup /usr/local/bin/triplet-cache-cleanup
COPY config.example.yaml /etc/triplet/config.yaml
RUN ldd /usr/local/bin/triplet >/dev/null

Expand Down
110 changes: 110 additions & 0 deletions cmd/triplet-cache-cleanup/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Command triplet-cache-cleanup performs explicit filesystem cache cleanup.
package main

import (
"context"
"flag"
"fmt"
"io"
"os"

"github.com/libops/triplet/internal/cache"
"github.com/libops/triplet/internal/config"
)

type namedReport struct {
name string
maxConfig string
report cache.CleanupReport
}

func main() {
configPath := flag.String("config", "config.yaml", "path to the YAML config file")
timeout := flag.Duration("timeout", 0, "optional cleanup timeout")
flag.Parse()

cfg, err := config.Load(*configPath)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "config: %v\n", err)
os.Exit(2)
}

ctx := context.Background()
if *timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, *timeout)
defer cancel()
}

reports, err := cleanupCaches(ctx, cfg)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "cache cleanup: %v\n", err)
os.Exit(1)
}
if len(reports) == 0 {
_, _ = fmt.Fprintln(os.Stdout, "no filesystem cache roots configured")
return
}

overMax := false
for _, r := range reports {
printReport(os.Stdout, r)
if r.report.OverMaxBytes {
overMax = true
_, _ = fmt.Fprintf(os.Stderr, "%s cache remains over %s: bytes=%d max_bytes=%d\n", r.name, r.maxConfig, r.report.Bytes, r.report.MaxBytes)
}
}
if overMax {
os.Exit(1)
}
}

func cleanupCaches(ctx context.Context, cfg *config.Config) ([]namedReport, error) {
var reports []namedReport
if cfg.Cache.Root != "" {
store, err := cache.NewPayloadFileStoreWithMaxAge(cfg.Cache.Root, int64(cfg.Cache.MaxBytes), cfg.Cache.MaxAge)
if err != nil {
return nil, fmt.Errorf("derivative cache: %w", err)
}
report, err := store.Cleanup(ctx)
if err != nil {
return nil, fmt.Errorf("derivative cache: %w", err)
}
reports = append(reports, namedReport{
name: "derivative",
maxConfig: "cache.max_bytes",
report: report,
})
}
if cfg.Cache.SourceRoot != "" {
store, err := cache.NewFileStore(cfg.Cache.SourceRoot, int64(cfg.Cache.SourceMaxBytes))
if err != nil {
return nil, fmt.Errorf("source cache: %w", err)
}
report, err := store.Cleanup(ctx)
if err != nil {
return nil, fmt.Errorf("source cache: %w", err)
}
reports = append(reports, namedReport{
name: "source",
maxConfig: "cache.source_max_bytes",
report: report,
})
}
return reports, nil
}

func printReport(out io.Writer, r namedReport) {
_, _ = fmt.Fprintf(out,
"%s cache root=%s scanned=%d removed=%d expired_removed=%d removed_bytes=%d bytes=%d max_bytes=%d over_max=%t\n",
r.name,
r.report.Root,
r.report.Scanned,
r.report.Removed,
r.report.ExpiredRemoved,
r.report.RemovedBytes,
r.report.Bytes,
r.report.MaxBytes,
r.report.OverMaxBytes,
)
}
117 changes: 117 additions & 0 deletions cmd/triplet-cache-cleanup/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package main

import (
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/libops/triplet/internal/cache"
"github.com/libops/triplet/internal/config"
)

func TestCleanupCachesRemovesExpiredDerivativeAndReportsOversize(t *testing.T) {
derivRoot := t.TempDir()
sourceRoot := t.TempDir()

derivStore, err := cache.NewPayloadFileStoreWithMaxAge(derivRoot, 0, time.Hour)
if err != nil {
t.Fatal(err)
}
if err := derivStore.Put(context.Background(), "old", "image/jpeg", strings.NewReader("old")); err != nil {
t.Fatal(err)
}
oldFiles := payloadFiles(t, derivRoot)
if len(oldFiles) != 1 {
t.Fatalf("payload files after old put = %d, want 1", len(oldFiles))
}
oldTime := time.Now().Add(-2 * time.Hour)
if err := os.Chtimes(oldFiles[0], oldTime, oldTime); err != nil {
t.Fatal(err)
}
if err := derivStore.Put(context.Background(), "new", "image/jpeg", strings.NewReader("new")); err != nil {
t.Fatal(err)
}

sourceStore, err := cache.NewFileStore(sourceRoot, 0)
if err != nil {
t.Fatal(err)
}
if err := sourceStore.Put(context.Background(), "source", "image/tiff", strings.NewReader("source")); err != nil {
t.Fatal(err)
}

reports, err := cleanupCaches(context.Background(), &config.Config{
Cache: config.Cache{
Root: derivRoot,
MaxAge: time.Hour,
SourceRoot: sourceRoot,
SourceMaxBytes: 1,
},
})
if err != nil {
t.Fatal(err)
}

derivReport := reportByName(t, reports, "derivative")
if derivReport.ExpiredRemoved != 1 {
t.Fatalf("expired removed = %d, want 1", derivReport.ExpiredRemoved)
}
if derivReport.Removed != 1 {
t.Fatalf("derivative removed = %d, want 1", derivReport.Removed)
}
if got := len(payloadFiles(t, derivRoot)); got != 1 {
t.Fatalf("derivative payload files = %d, want 1", got)
}

sourceReport := reportByName(t, reports, "source")
if !sourceReport.OverMaxBytes {
t.Fatal("expected source cache to report over max bytes")
}
if sourceReport.Bytes != int64(len("source")) {
t.Fatalf("source bytes = %d, want %d", sourceReport.Bytes, len("source"))
}
}

func TestCleanupCachesSkipsUnconfiguredRoots(t *testing.T) {
reports, err := cleanupCaches(context.Background(), &config.Config{})
if err != nil {
t.Fatal(err)
}
if len(reports) != 0 {
t.Fatalf("reports = %d, want 0", len(reports))
}
}

func reportByName(t *testing.T, reports []namedReport, name string) cache.CleanupReport {
t.Helper()
for _, report := range reports {
if report.name == name {
return report.report
}
}
t.Fatalf("missing %s report", name)
return cache.CleanupReport{}
}

func payloadFiles(t *testing.T, root string) []string {
t.Helper()
var out []string
err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() || filepath.Ext(path) == ".meta" || strings.HasPrefix(d.Name(), ".tmp-") {
return nil
}
out = append(out, path)
return nil
})
if err != nil && !errors.Is(err, os.ErrNotExist) {
t.Fatal(err)
}
return out
}
15 changes: 8 additions & 7 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,19 @@ cache:
root: /var/lib/triplet/cache
# Best-effort aggregate size target for all cached derivative payload files
# under cache.root. This controls retained cache footprint over time, not the
# size of any single generated response. A write may temporarily exceed this
# target before eviction runs, and metadata sidecar files are not counted.
# 0 disables size-based eviction.
# size of any single generated response. The server does not prune for size
# in the request path; run triplet-cache-cleanup periodically to report when
# this target is exceeded. Metadata sidecar files are not counted. 0 disables
# size reporting.
max_bytes: 500GiB
# Optional age limit for derivative entries. Expired entries are removed on
# read and opportunistically during writes. 0 disables age-based eviction.
# Optional age limit for derivative entries. Expired entries miss on read and
# are removed by triplet-cache-cleanup. 0 disables age-based cleanup.
max_age: 720h
# Optional filesystem source cache for fetched source bytes (primarily HTTP
# identifiers).
# source_root: /var/lib/triplet/source-cache
# Best-effort eviction target for the source cache. 0 disables size-based
# eviction.
# Best-effort reporting target for the source cache. The cleanup command
# reports when this target is exceeded. 0 disables size reporting.
source_max_bytes: 1GiB
# When non-zero, stale source-cache hits are served immediately while a
# background refresh fetches a fresh copy for later requests.
Expand Down
35 changes: 23 additions & 12 deletions docs/caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,35 @@ cache:
max_age: 720h
```

`max_bytes` is a best-effort filesystem eviction target. `max_age` is an
optional age limit for derivative entries. Failed transforms and HTTP error
responses are not stored.
`max_bytes` is a best-effort filesystem size target. `max_age` is an optional
age limit for derivative entries. Failed transforms and HTTP error responses
are not stored.

`cache.max_bytes` is the approximate total retained size of derivative payload
files under `cache.root`. It is different from
`iiif.image.max_derivative_bytes`, which limits one generated response before it
can be returned or cached. A cache write can temporarily exceed `cache.max_bytes`
before eviction runs. When size eviction runs, Triplet removes the oldest
derivative payload files first based on payload file modification time; reads
do not refresh cache age.
can be returned or cached. Cache writes do not walk or prune the cache tree in
the server request path.

`cache.max_age` is based on the derivative payload file modification time, not
when it was last requested. When a cached derivative is older than `max_age`,
Triplet removes it and treats the request as a cache miss. Expired entries are
also removed opportunistically when new entries are written. Set `max_age: 0`
or omit it to keep derivative files until size eviction, manual deletion,
invalidation, or cache-key changes make them unused.
Triplet treats the request as a cache miss. Set `max_age: 0` or omit it to keep
derivative files until manual deletion, invalidation, or cache-key changes make
them unused.

Run `triplet-cache-cleanup` periodically from cron, systemd timers, Kubernetes
CronJobs, or a similar scheduler:

```sh
triplet-cache-cleanup -config /etc/triplet/config.yaml
```

The cleanup command reads the same YAML configuration as the server. It removes
derivative cache entries older than `cache.max_age`, then measures the remaining
derivative cache size. It also measures the source cache when `cache.source_root`
is configured. If a cache remains above `cache.max_bytes` or
`cache.source_max_bytes`, the command reports that condition and exits non-zero;
it does not delete live entries solely to satisfy a size target.

### Derivative invalidation

Expand Down Expand Up @@ -153,7 +164,7 @@ derivative and source caches.

| Layer | Configuration | What is cached | Invalidation / freshness |
|---|---|---|---|
| Derivative cache | `cache.root`; optional `cache.max_bytes`, `cache.max_age`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. `cache.max_bytes` is a best-effort aggregate cache budget; `cache.max_age` removes derivative entries older than the configured duration. `iiif.image.max_derivative_bytes` is the per-response size limit before return/cache. Failed transforms and HTTP error responses are not stored. |
| Derivative cache | `cache.root`; optional `cache.max_bytes`, `cache.max_age`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. `cache.max_bytes` is a best-effort aggregate cache budget reported by `triplet-cache-cleanup`; `cache.max_age` is enforced on reads and by `triplet-cache-cleanup`. `iiif.image.max_derivative_bytes` is the per-response size limit before return/cache. Failed transforms and HTTP error responses are not stored. |
| HTTP source cache | `cache.source_root`; optional `cache.source_max_bytes`, `cache.source_stale_after` | Original source bytes fetched through the HTTP source backend. | Keys are source identifiers. When `source_stale_after` is set, stale hits are served immediately and refreshed in the background. Upstream 4xx/5xx responses are not stored. |
| HTTP metadata cache | `sources.http.metadata_cache_ttl` | Successful remote source metadata lookups for URL identifiers. | In-memory only. While fresh, derivative cache checks can avoid upstream metadata requests. This can serve stale derivatives until the TTL expires. |
| `info.json` dimension cache | `iiif.image.info_dimension_cache` | Source dimensions used to build Image API `info.json`. | In-memory only. Entries are keyed by identifier plus source size/modtime metadata, so source changes with updated metadata miss the cache. |
Expand Down
Loading
Loading