From 9f6ad3502ca4717d96c448d658b5dd8840981167 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 11:06:00 +0000 Subject: [PATCH 1/9] Use storedAt for cache eviction --- docs/caching.md | 12 +++- internal/cache/file.go | 12 ++-- internal/cache/file_test.go | 123 ++++++++++++++++++++++++++++++++---- 3 files changed, 126 insertions(+), 21 deletions(-) diff --git a/docs/caching.md b/docs/caching.md index 6766059..76e1013 100644 --- a/docs/caching.md +++ b/docs/caching.md @@ -28,7 +28,8 @@ files under `cache.root`. It is different from `iiif.image.max_derivative_bytes`, which limits one generated response before it can be returned or cached. A cache write can temporarily exceed `cache.max_bytes` before eviction runs, and metadata sidecar files are not counted toward the -target. +target. When size eviction runs, Triplet removes the oldest-written derivative +entries first; reads do not refresh cache age. `cache.max_age` is based on when Triplet wrote the derivative entry, not when it was last requested. When a cached derivative is older than `max_age`, Triplet @@ -51,6 +52,15 @@ The invalidation route is protected by a bearer token and optional caller CIDR checks. See [Authorization](authorization.md#cache-invalidation-route) for the route configuration and caller requirements. +```sh +curl -X POST \ + -H "Authorization: Bearer ${TRIPLET_IMAGE_CACHE_INVALIDATION_TOKEN}" \ + "https://iiif.example.edu/iiif/3/https%3A%2F%2Frepo.example.edu%2Fsystem%2Ffiles%2Fimage.tif/cache/invalidate" +``` + +The identifier in the request path must be URI-encoded exactly as it appears in +the IIIF Image API request path segment. + ### Source version metadata Derivative cache keys include a source version so Triplet does not reuse old diff --git a/internal/cache/file.go b/internal/cache/file.go index fa655cc..6dd2d16 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -23,7 +23,7 @@ type FileStore struct { Root string // MaxBytes optionally bounds total cache size; when exceeded, the - // least-recently-modified entries are evicted on the next Put. + // oldest-written entries are evicted on the next Put. MaxBytes int64 // MaxAge optionally bounds how long entries remain usable after Put. @@ -87,9 +87,6 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er } return nil, Entry{}, err } - // Refresh mtime so eviction LRU treats this as recently used. - now := time.Now() - _ = os.Chtimes(dataPath, now, now) return f, Entry(m), nil } @@ -157,7 +154,6 @@ func (s *FileStore) evict() { path string metaPath string size int64 - mod time.Time storedAt time.Time } var entries []entry @@ -186,7 +182,7 @@ func (s *FileStore) evict() { _ = os.Remove(metaPath) return nil } - entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), mod: info.ModTime(), storedAt: storedAt}) + entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), storedAt: storedAt}) total += info.Size() return nil }) @@ -194,10 +190,10 @@ func (s *FileStore) evict() { return } sort.Slice(entries, func(i, j int) bool { - if entries[i].mod.Equal(entries[j].mod) { + if entries[i].storedAt.Equal(entries[j].storedAt) { return entries[i].path < entries[j].path } - return entries[i].mod.Before(entries[j].mod) + return entries[i].storedAt.Before(entries[j].storedAt) }) for _, e := range entries { if total <= s.MaxBytes { diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index b37e008..ed00900 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "testing" + "testing/synctest" "time" ) @@ -106,28 +107,126 @@ func TestFileStoreEvictsWhenOversize(t *testing.T) { t.Fatal("expected oldest entry to be evicted") } -func TestFileStoreMaxAgeExpiresOnGet(t *testing.T) { - store, err := NewFileStoreWithMaxAge(t.TempDir(), 0, time.Nanosecond) +func TestFileStoreGetDoesNotRefreshEvictionOrder(t *testing.T) { + store, err := NewFileStore(t.TempDir(), 5) if err != nil { t.Fatal(err) } - if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("payload")); err != nil { + + if err := store.Put(context.Background(), "a", "text/plain", bytes.NewReader([]byte("1234"))); err != nil { + t.Fatal(err) + } + time.Sleep(20 * time.Millisecond) + if err := store.Put(context.Background(), "b", "text/plain", bytes.NewReader([]byte("5678"))); err != nil { t.Fatal(err) } - time.Sleep(time.Millisecond) - _, _, err = store.Get(context.Background(), "old") - if !errors.Is(err, ErrMiss) { - t.Fatalf("err = %v, want miss", err) + rc, _, err := store.Get(context.Background(), "a") + if err != nil { + t.Fatal(err) } + _ = rc.Close() - dataPath, metaPath := store.paths("old") - if _, err := os.Stat(dataPath); !errors.Is(err, os.ErrNotExist) { - t.Fatalf("data stat err = %v, want not exist", err) + time.Sleep(20 * time.Millisecond) + if err := store.Put(context.Background(), "c", "text/plain", bytes.NewReader([]byte("90"))); err != nil { + t.Fatal(err) + } + + if _, _, err := store.Get(context.Background(), "a"); !errors.Is(err, ErrMiss) { + t.Fatalf("a err = %v, want miss", err) + } + if rc, _, err := store.Get(context.Background(), "b"); err != nil { + t.Fatalf("b err = %v", err) + } else { + _ = rc.Close() + } + if rc, _, err := store.Get(context.Background(), "c"); err != nil { + t.Fatalf("c err = %v", err) + } else { + _ = rc.Close() + } +} + +func TestFileStoreEvictsByStoredAt(t *testing.T) { + store, err := NewFileStore(t.TempDir(), 5) + if err != nil { + t.Fatal(err) + } + + if err := store.Put(context.Background(), "a", "text/plain", bytes.NewReader([]byte("1234"))); err != nil { + t.Fatal(err) + } + if err := store.Put(context.Background(), "b", "text/plain", bytes.NewReader([]byte("5678"))); err != nil { + t.Fatal(err) + } + + dataPathA, metaPathA := store.paths("a") + oldTime := time.Now().Add(-2 * time.Hour) + if err := os.Chtimes(dataPathA, time.Now(), time.Now()); err != nil { + t.Fatal(err) + } + metaA := fileMeta{ContentType: "text/plain", Size: 4, StoredAt: oldTime} + mbA, _ := json.Marshal(metaA) + if err := os.WriteFile(metaPathA, mbA, 0o640); err != nil { + t.Fatal(err) + } + + dataPathB, metaPathB := store.paths("b") + newTime := time.Now().Add(-1 * time.Hour) + if err := os.Chtimes(dataPathB, oldTime, oldTime); err != nil { + t.Fatal(err) + } + metaB := fileMeta{ContentType: "text/plain", Size: 4, StoredAt: newTime} + mbB, _ := json.Marshal(metaB) + if err := os.WriteFile(metaPathB, mbB, 0o640); err != nil { + t.Fatal(err) + } + + if err := store.Put(context.Background(), "c", "text/plain", bytes.NewReader([]byte("90"))); err != nil { + t.Fatal(err) + } + + if _, _, err := store.Get(context.Background(), "a"); !errors.Is(err, ErrMiss) { + t.Fatalf("a err = %v, want miss", err) } - if _, err := os.Stat(metaPath); !errors.Is(err, os.ErrNotExist) { - t.Fatalf("meta stat err = %v, want not exist", err) + if rc, _, err := store.Get(context.Background(), "b"); err != nil { + t.Fatalf("b err = %v", err) + } else { + _ = rc.Close() } + if rc, _, err := store.Get(context.Background(), "c"); err != nil { + t.Fatalf("c err = %v", err) + } else { + _ = rc.Close() + } +} + +func TestFileStoreMaxAgeExpiresOnGet(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + store, err := NewFileStoreWithMaxAge(t.TempDir(), 0, time.Nanosecond) + if err != nil { + t.Fatal(err) + } + if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("payload")); err != nil { + t.Fatal(err) + } + + time.Sleep(time.Millisecond) + synctest.Wait() + + _, _, err = store.Get(context.Background(), "old") + if !errors.Is(err, ErrMiss) { + t.Fatalf("err = %v, want miss", err) + } + + dataPath, metaPath := store.paths("old") + if _, err := os.Stat(dataPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("data stat err = %v, want not exist", err) + } + if _, err := os.Stat(metaPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("meta stat err = %v, want not exist", err) + } + }) } func TestFileStoreMaxAgeEvictsOnPut(t *testing.T) { From 7e130dcebe2b6698329ec9c2bcb8fcbfd9b449de Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 07:27:59 -0400 Subject: [PATCH 2/9] Simplify fileMeta --- docs/caching.md | 19 ++++++------- internal/cache/file.go | 54 +++++++++++++++++++++---------------- internal/cache/file_test.go | 49 +++++++++++---------------------- 3 files changed, 57 insertions(+), 65 deletions(-) diff --git a/docs/caching.md b/docs/caching.md index 76e1013..57ac18c 100644 --- a/docs/caching.md +++ b/docs/caching.md @@ -28,15 +28,16 @@ files under `cache.root`. It is different from `iiif.image.max_derivative_bytes`, which limits one generated response before it can be returned or cached. A cache write can temporarily exceed `cache.max_bytes` before eviction runs, and metadata sidecar files are not counted toward the -target. When size eviction runs, Triplet removes the oldest-written derivative -entries first; reads do not refresh cache age. - -`cache.max_age` is based on when Triplet wrote the derivative entry, not when it -was last requested. When a cached derivative is older than `max_age`, Triplet -removes it and treats the request as a cache miss. Expired entries are also -removed opportunistically when new entries are written. Set `max_age: 0` or omit -it to keep derivative files until size eviction, manual deletion, invalidation, -or cache-key changes make them unused. +target. When size eviction runs, Triplet removes the oldest derivative payload +files first based on payload file modification time; reads do not refresh cache +age. + +`cache.max_age` is based on the derivative payload file modification time, not +when it was last requested. When a cached derivative is older than `max_age`, +Triplet removes it and treats the request as a cache miss. Expired entries are +also removed opportunistically when new entries are written. Set `max_age: 0` +or omit it to keep derivative files until size eviction, manual deletion, +invalidation, or cache-key changes make them unused. ### Derivative invalidation diff --git a/internal/cache/file.go b/internal/cache/file.go index 6dd2d16..b08f12a 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -23,7 +23,7 @@ type FileStore struct { Root string // MaxBytes optionally bounds total cache size; when exceeded, the - // oldest-written entries are evicted on the next Put. + // oldest payload files are evicted on the next Put based on mtime. MaxBytes int64 // MaxAge optionally bounds how long entries remain usable after Put. @@ -56,9 +56,7 @@ func NewFileStoreWithMaxAge(root string, maxBytes int64, maxAge time.Duration) ( } type fileMeta struct { - ContentType string `json:"content_type"` - Size int64 `json:"size"` - StoredAt time.Time `json:"stored_at"` + ContentType string `json:"content_type"` } // Get implements Store. @@ -75,11 +73,6 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er if err := json.Unmarshal(mb, &m); err != nil { return nil, Entry{}, fmt.Errorf("cache meta: %w", err) } - if s.expired(m.StoredAt, time.Now()) { - _ = os.Remove(dataPath) - _ = os.Remove(metaPath) - return nil, Entry{}, ErrMiss - } f, err := os.Open(dataPath) if err != nil { if errors.Is(err, fs.ErrNotExist) { @@ -87,7 +80,23 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er } return nil, Entry{}, err } - return f, Entry(m), nil + info, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, Entry{}, err + } + storedAt := info.ModTime() + if s.expired(storedAt, time.Now()) { + _ = f.Close() + _ = os.Remove(dataPath) + _ = os.Remove(metaPath) + return nil, Entry{}, ErrMiss + } + return f, Entry{ + ContentType: m.ContentType, + Size: info.Size(), + StoredAt: storedAt, + }, nil } // Put implements Store. @@ -101,7 +110,7 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea return err } tmpName := tmp.Name() - n, err := io.Copy(tmp, value) + _, err = io.Copy(tmp, value) if err != nil { _ = tmp.Close() _ = os.Remove(tmpName) @@ -117,12 +126,11 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea } storedAt := time.Now() _ = os.Chtimes(dataPath, storedAt, storedAt) - meta := fileMeta{ContentType: contentType, Size: n, StoredAt: storedAt} + meta := fileMeta{ContentType: contentType} mb, _ := json.Marshal(meta) if err := os.WriteFile(metaPath, mb, 0o640); err != nil { return err } - _ = os.Chtimes(metaPath, storedAt, storedAt) if s.MaxAge > 0 || s.MaxBytes > 0 { s.evict() } @@ -154,7 +162,7 @@ func (s *FileStore) evict() { path string metaPath string size int64 - storedAt time.Time + modTime time.Time } var entries []entry now := time.Now() @@ -169,20 +177,20 @@ func (s *FileStore) evict() { if err != nil { return nil } - storedAt := info.ModTime() metaPath := p + ".meta" - if mb, err := os.ReadFile(metaPath); err == nil { - var m fileMeta - if err := json.Unmarshal(mb, &m); err == nil && !m.StoredAt.IsZero() { - storedAt = m.StoredAt + if _, err := os.Stat(metaPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + _ = os.Remove(p) + return nil } + return nil } - if s.expired(storedAt, now) { + if s.expired(info.ModTime(), now) { _ = os.Remove(p) _ = os.Remove(metaPath) return nil } - entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), storedAt: storedAt}) + entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), modTime: info.ModTime()}) total += info.Size() return nil }) @@ -190,10 +198,10 @@ func (s *FileStore) evict() { return } sort.Slice(entries, func(i, j int) bool { - if entries[i].storedAt.Equal(entries[j].storedAt) { + if entries[i].modTime.Equal(entries[j].modTime) { return entries[i].path < entries[j].path } - return entries[i].storedAt.Before(entries[j].storedAt) + return entries[i].modTime.Before(entries[j].modTime) }) for _, e := range entries { if total <= s.MaxBytes { diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index ed00900..4df0c06 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -3,7 +3,6 @@ package cache import ( "bytes" "context" - "encoding/json" "errors" "io" "os" @@ -95,20 +94,18 @@ func TestFileStoreEvictsWhenOversize(t *testing.T) { t.Fatal(err) } - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - _, _, errA := store.Get(context.Background(), "a") - _, _, errB := store.Get(context.Background(), "b") - if errors.Is(errA, ErrMiss) && errB == nil { - return - } - time.Sleep(20 * time.Millisecond) + if _, _, err := store.Get(context.Background(), "a"); !errors.Is(err, ErrMiss) { + t.Fatalf("a err = %v, want miss", err) + } + if rc, _, err := store.Get(context.Background(), "b"); err != nil { + t.Fatalf("b err = %v", err) + } else { + _ = rc.Close() } - t.Fatal("expected oldest entry to be evicted") } func TestFileStoreGetDoesNotRefreshEvictionOrder(t *testing.T) { - store, err := NewFileStore(t.TempDir(), 5) + store, err := NewFileStore(t.TempDir(), 8) if err != nil { t.Fatal(err) } @@ -147,8 +144,8 @@ func TestFileStoreGetDoesNotRefreshEvictionOrder(t *testing.T) { } } -func TestFileStoreEvictsByStoredAt(t *testing.T) { - store, err := NewFileStore(t.TempDir(), 5) +func TestFileStoreEvictsByModTime(t *testing.T) { + store, err := NewFileStore(t.TempDir(), 8) if err != nil { t.Fatal(err) } @@ -160,25 +157,14 @@ func TestFileStoreEvictsByStoredAt(t *testing.T) { t.Fatal(err) } - dataPathA, metaPathA := store.paths("a") + dataPathA, _ := store.paths("a") oldTime := time.Now().Add(-2 * time.Hour) - if err := os.Chtimes(dataPathA, time.Now(), time.Now()); err != nil { - t.Fatal(err) - } - metaA := fileMeta{ContentType: "text/plain", Size: 4, StoredAt: oldTime} - mbA, _ := json.Marshal(metaA) - if err := os.WriteFile(metaPathA, mbA, 0o640); err != nil { + if err := os.Chtimes(dataPathA, oldTime, oldTime); err != nil { t.Fatal(err) } - - dataPathB, metaPathB := store.paths("b") + dataPathB, _ := store.paths("b") newTime := time.Now().Add(-1 * time.Hour) - if err := os.Chtimes(dataPathB, oldTime, oldTime); err != nil { - t.Fatal(err) - } - metaB := fileMeta{ContentType: "text/plain", Size: 4, StoredAt: newTime} - mbB, _ := json.Marshal(metaB) - if err := os.WriteFile(metaPathB, mbB, 0o640); err != nil { + if err := os.Chtimes(dataPathB, newTime, newTime); err != nil { t.Fatal(err) } @@ -237,12 +223,9 @@ func TestFileStoreMaxAgeEvictsOnPut(t *testing.T) { if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("old")); err != nil { t.Fatal(err) } - dataPath, metaPath := store.paths("old") + dataPath, _ := store.paths("old") oldTime := time.Now().Add(-2 * time.Hour) - _ = os.Chtimes(dataPath, oldTime, oldTime) - meta := fileMeta{ContentType: "text/plain", Size: 3, StoredAt: oldTime} - mb, _ := json.Marshal(meta) - if err := os.WriteFile(metaPath, mb, 0o640); err != nil { + if err := os.Chtimes(dataPath, oldTime, oldTime); err != nil { t.Fatal(err) } From 64119b6315e5cb5abb040f72553daa6e0696db4d Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 11:32:31 +0000 Subject: [PATCH 3/9] Remove derivative cache content-type sidecars --- docs/caching.md | 7 +- internal/cache/file.go | 68 +++++++++++++------ internal/cache/file_test.go | 39 +++++++++++ internal/iiif/image/v3/handler/handler.go | 4 +- .../iiif/image/v3/handler/handler_test.go | 37 ++++++++++ internal/server/server.go | 2 +- 6 files changed, 130 insertions(+), 27 deletions(-) diff --git a/docs/caching.md b/docs/caching.md index 57ac18c..a506ad8 100644 --- a/docs/caching.md +++ b/docs/caching.md @@ -27,10 +27,9 @@ responses are not stored. files under `cache.root`. It is different from `iiif.image.max_derivative_bytes`, which limits one generated response before it can be returned or cached. A cache write can temporarily exceed `cache.max_bytes` -before eviction runs, and metadata sidecar files are not counted toward the -target. When size eviction runs, Triplet removes the oldest derivative payload -files first based on payload file modification time; reads do not refresh cache -age. +before eviction runs. When size eviction runs, Triplet removes the oldest +derivative payload files first based on payload file modification time; reads +do not refresh cache age. `cache.max_age` is based on the derivative payload file modification time, not when it was last requested. When a cached derivative is older than `max_age`, diff --git a/internal/cache/file.go b/internal/cache/file.go index b08f12a..d04e462 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -21,6 +21,7 @@ import ( // characters and still be safe filenames. type FileStore struct { Root string + StoreContentType bool // MaxBytes optionally bounds total cache size; when exceeded, the // oldest payload files are evicted on the next Put based on mtime. @@ -42,7 +43,7 @@ func NewFileStore(root string, maxBytes int64) (*FileStore, error) { if err := os.MkdirAll(abs, 0o750); err != nil { return nil, fmt.Errorf("cache file mkdir: %w", err) } - return &FileStore{Root: abs, MaxBytes: maxBytes}, nil + return &FileStore{Root: abs, StoreContentType: true, MaxBytes: maxBytes}, nil } // NewFileStoreWithMaxAge constructs a FileStore with size and age eviction. @@ -55,6 +56,19 @@ func NewFileStoreWithMaxAge(root string, maxBytes int64, maxAge time.Duration) ( return store, nil } +// NewPayloadFileStoreWithMaxAge constructs a payload-only FileStore. It stores +// content type out of band in the caller and uses payload file metadata for +// age and size accounting. +func NewPayloadFileStoreWithMaxAge(root string, maxBytes int64, maxAge time.Duration) (*FileStore, error) { + store, err := NewFileStore(root, maxBytes) + if err != nil { + return nil, err + } + store.StoreContentType = false + store.MaxAge = maxAge + return store, nil +} + type fileMeta struct { ContentType string `json:"content_type"` } @@ -62,16 +76,20 @@ type fileMeta struct { // Get implements Store. func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, error) { dataPath, metaPath := s.paths(key) - mb, err := os.ReadFile(metaPath) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - return nil, Entry{}, ErrMiss + contentType := "" + if s.StoreContentType { + mb, err := os.ReadFile(metaPath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil, Entry{}, ErrMiss + } + return nil, Entry{}, err } - return nil, Entry{}, err - } - var m fileMeta - if err := json.Unmarshal(mb, &m); err != nil { - return nil, Entry{}, fmt.Errorf("cache meta: %w", err) + var m fileMeta + if err := json.Unmarshal(mb, &m); err != nil { + return nil, Entry{}, fmt.Errorf("cache meta: %w", err) + } + contentType = m.ContentType } f, err := os.Open(dataPath) if err != nil { @@ -93,7 +111,7 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er return nil, Entry{}, ErrMiss } return f, Entry{ - ContentType: m.ContentType, + ContentType: contentType, Size: info.Size(), StoredAt: storedAt, }, nil @@ -126,10 +144,12 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea } storedAt := time.Now() _ = os.Chtimes(dataPath, storedAt, storedAt) - meta := fileMeta{ContentType: contentType} - mb, _ := json.Marshal(meta) - if err := os.WriteFile(metaPath, mb, 0o640); err != nil { - return err + if s.StoreContentType { + meta := fileMeta{ContentType: contentType} + mb, _ := json.Marshal(meta) + if err := os.WriteFile(metaPath, mb, 0o640); err != nil { + return err + } } if s.MaxAge > 0 || s.MaxBytes > 0 { s.evict() @@ -178,16 +198,20 @@ func (s *FileStore) evict() { return nil } metaPath := p + ".meta" - if _, err := os.Stat(metaPath); err != nil { - if errors.Is(err, fs.ErrNotExist) { - _ = os.Remove(p) + if s.StoreContentType { + if _, err := os.Stat(metaPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + _ = os.Remove(p) + return nil + } return nil } - return nil } if s.expired(info.ModTime(), now) { _ = os.Remove(p) - _ = os.Remove(metaPath) + if s.StoreContentType { + _ = os.Remove(metaPath) + } return nil } entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), modTime: info.ModTime()}) @@ -208,7 +232,9 @@ func (s *FileStore) evict() { return } _ = os.Remove(e.path) - _ = os.Remove(e.metaPath) + if s.StoreContentType { + _ = os.Remove(e.metaPath) + } total -= e.size } } diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index 4df0c06..05c5311 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -55,6 +55,45 @@ func TestFileStorePutGetDelete(t *testing.T) { } } +func TestPayloadFileStorePutGetDelete(t *testing.T) { + store, err := NewPayloadFileStoreWithMaxAge(t.TempDir(), 0, 0) + if err != nil { + t.Fatal(err) + } + + if err := store.Put(context.Background(), "abc", "image/png", strings.NewReader("payload")); err != nil { + t.Fatal(err) + } + + rc, entry, err := store.Get(context.Background(), "abc") + if err != nil { + t.Fatal(err) + } + defer rc.Close() + + b, err := io.ReadAll(rc) + if err != nil { + t.Fatal(err) + } + if string(b) != "payload" { + t.Fatalf("body = %q", string(b)) + } + if entry.ContentType != "" { + t.Fatalf("content-type = %q, want empty", entry.ContentType) + } + if entry.Size != int64(len("payload")) { + t.Fatalf("size = %d", entry.Size) + } + if entry.StoredAt.IsZero() { + t.Fatal("stored_at was zero") + } + + _, metaPath := store.paths("abc") + if _, err := os.Stat(metaPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("meta stat err = %v, want not exist", err) + } +} + func TestFileStoreMissDoesNotCreateKeyDirectory(t *testing.T) { root := t.TempDir() store, err := NewFileStore(root, 0) diff --git a/internal/iiif/image/v3/handler/handler.go b/internal/iiif/image/v3/handler/handler.go index 2a4c9a5..5d060a4 100644 --- a/internal/iiif/image/v3/handler/handler.go +++ b/internal/iiif/image/v3/handler/handler.go @@ -198,6 +198,7 @@ func (h *Handler) serveImage(w http.ResponseWriter, r *http.Request, req parse.R } key, cacheable := derivativeKey(req, meta) etag := "" + contentType := contentTypeForFormat(req.Format) if cacheable { if h.invalidationToken != "" { if version := h.derivativeInvalidationVersion(r.Context(), req.Identifier); version != "" { @@ -215,6 +216,8 @@ func (h *Handler) serveImage(w http.ResponseWriter, r *http.Request, req parse.R defer rc.Close() if entry.ContentType != "" { w.Header().Set("Content-Type", entry.ContentType) + } else { + w.Header().Set("Content-Type", contentType) } if entry.Size > 0 { w.Header().Set("Content-Length", fmt.Sprintf("%d", entry.Size)) @@ -232,7 +235,6 @@ func (h *Handler) serveImage(w http.ResponseWriter, r *http.Request, req parse.R } } - contentType := contentTypeForFormat(req.Format) w.Header().Set("Content-Type", contentType) w.Header().Set("X-Cache", "miss") diff --git a/internal/iiif/image/v3/handler/handler_test.go b/internal/iiif/image/v3/handler/handler_test.go index 966aab6..66b803b 100644 --- a/internal/iiif/image/v3/handler/handler_test.go +++ b/internal/iiif/image/v3/handler/handler_test.go @@ -211,6 +211,43 @@ func TestImageRequestHasCanonicalLink(t *testing.T) { } } +func TestDerivativeCacheHitInfersContentTypeFromRequestFormat(t *testing.T) { + store, err := cache.NewPayloadFileStoreWithMaxAge(t.TempDir(), 0, time.Hour) + if err != nil { + t.Fatal(err) + } + srv, _ := setupTestServerWithCache(t, store) + defer srv.Close() + + imageURL := srv.URL + "/iiif/3/sample.png/full/max/0/default.png" + first, err := http.Get(imageURL) + if err != nil { + t.Fatal(err) + } + defer first.Body.Close() + if first.StatusCode != http.StatusOK { + t.Fatalf("first status = %d", first.StatusCode) + } + if got := first.Header.Get("Content-Type"); got != "image/png" { + t.Fatalf("first Content-Type = %q", got) + } + + second, err := http.Get(imageURL) + if err != nil { + t.Fatal(err) + } + defer second.Body.Close() + if second.StatusCode != http.StatusOK { + t.Fatalf("second status = %d", second.StatusCode) + } + if got := second.Header.Get("X-Cache"); got != "hit" { + t.Fatalf("second X-Cache = %q", got) + } + if got := second.Header.Get("Content-Type"); got != "image/png" { + t.Fatalf("second Content-Type = %q", got) + } +} + func TestUnsupportedSourceUsesClientError(t *testing.T) { root := t.TempDir() if err := os.WriteFile(filepath.Join(root, "bad.png"), []byte("not an image"), 0o600); err != nil { diff --git a/internal/server/server.go b/internal/server/server.go index fb215cf..e7e2d95 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -378,7 +378,7 @@ func buildDerivativeCache(cfg *config.Config) (cache.Store, error) { if cfg.Cache.Root == "" { return cache.Noop{}, nil } - return cache.NewFileStoreWithMaxAge(cfg.Cache.Root, int64(cfg.Cache.MaxBytes), cfg.Cache.MaxAge) + return cache.NewPayloadFileStoreWithMaxAge(cfg.Cache.Root, int64(cfg.Cache.MaxBytes), cfg.Cache.MaxAge) } func buildSourceCache(cfg *config.Config) (cache.Store, error) { From 0707aba0112c18cc6483906a36ed4d0357b2a500 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 07:47:41 -0400 Subject: [PATCH 4/9] remove meta from derivative cache store remove cantaloupe from benchmark runs --- internal/cache/file.go | 2 +- scripts/benchmark-iiif.sh | 100 ++++-------------------------------- scripts/benchmark-report.py | 67 +++++++++++++++--------- 3 files changed, 54 insertions(+), 115 deletions(-) diff --git a/internal/cache/file.go b/internal/cache/file.go index d04e462..63a75ce 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -20,7 +20,7 @@ import ( // metadata in .meta. Keys are hashed (SHA-256) so they can contain any // characters and still be safe filenames. type FileStore struct { - Root string + Root string StoreContentType bool // MaxBytes optionally bounds total cache size; when exceeded, the diff --git a/scripts/benchmark-iiif.sh b/scripts/benchmark-iiif.sh index f72407a..b508650 100755 --- a/scripts/benchmark-iiif.sh +++ b/scripts/benchmark-iiif.sh @@ -15,10 +15,8 @@ CONCURRENCY_LIST="${BENCH_CONCURRENCY_LIST:-}" UNCACHED_CONCURRENCY_LIST="${BENCH_UNCACHED_CONCURRENCY_LIST:-2 4 8}" CACHED_CONCURRENCY_LIST="${BENCH_CACHED_CONCURRENCY_LIST:-8 32 128}" TRIPLET_IMAGE="${BENCH_TRIPLET_IMAGE:-triplet-benchmark:dev}" -CANTALOUPE_IMAGE="${BENCH_CANTALOUPE_IMAGE:-islandora/cantaloupe:main}" SKIP_BUILD="${BENCH_SKIP_BUILD:-0}" TRIPLET_PORT="${BENCH_TRIPLET_PORT:-18080}" -CANTALOUPE_PORT="${BENCH_CANTALOUPE_PORT:-18182}" PASSES="${BENCH_PASSES:-5}" WARMUP_PASSES="${BENCH_WARMUP_PASSES:-1}" CONCURRENCY="${BENCH_CONCURRENCY:-1}" @@ -35,7 +33,6 @@ APPEND_RUN_REPORTS="${BENCH_APPEND_RUN_REPORTS:-1}" NETWORK="triplet-bench-$RUN_ID" TRIPLET_CONTAINER="triplet-bench-triplet-$RUN_ID" -CANTALOUPE_CONTAINER="triplet-bench-cantaloupe-$RUN_ID" mkdir -p "$OUT_DIR/request-lines" "$OUT_DIR/logs" @@ -48,9 +45,8 @@ cleanup() { wait "$STATS_PID" >/dev/null 2>&1 || true fi docker logs "$TRIPLET_CONTAINER" >"$OUT_DIR/logs/triplet.log" 2>&1 || true - docker logs "$CANTALOUPE_CONTAINER" >"$OUT_DIR/logs/cantaloupe.log" 2>&1 || true if [ "$KEEP_CONTAINERS" != "1" ]; then - docker rm -f "$TRIPLET_CONTAINER" "$CANTALOUPE_CONTAINER" >/dev/null 2>&1 || true + docker rm -f "$TRIPLET_CONTAINER" >/dev/null 2>&1 || true docker network rm "$NETWORK" >/dev/null 2>&1 || true fi } @@ -93,33 +89,6 @@ wait_http() { done } -wait_any_http() { - local name="$1" - local url="$2" - local container="${3:-}" - local deadline=$((SECONDS + 120)) - local code - while true; do - code="$(curl -sS -o /dev/null -w '%{http_code}' "$url" 2>/dev/null || true)" - if [ "$code" != "000" ] && [ -n "$code" ]; then - return - fi - if [ -n "$container" ] && [ "$(docker inspect "$container" --format '{{.State.Running}}' 2>/dev/null || true)" = "false" ]; then - echo "$name exited before it became reachable: $url" >&2 - docker logs "$container" >&2 || true - exit 1 - fi - if [ "$SECONDS" -ge "$deadline" ]; then - echo "$name did not become reachable: $url" >&2 - if [ -n "$container" ]; then - docker logs "$container" >&2 || true - fi - exit 1 - fi - sleep 2 - done -} - write_triplet_config() { local cache_config local debug_config="" @@ -179,20 +148,12 @@ EOF collect_stats() { while true; do - for server in triplet cantaloupe; do - local container - if [ "$server" = "triplet" ]; then - container="$TRIPLET_CONTAINER" - else - container="$CANTALOUPE_CONTAINER" - fi - stats="$(docker stats --no-stream --format '{{json .}}' "$container" 2>/dev/null || true)" - if [ -n "$stats" ]; then - printf '{"ts":"%s","server":"%s","container":"%s","stats":%s}\n' \ - "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$server" "$container" "$stats" \ - >>"$OUT_DIR/container-stats.jsonl" - fi - done + stats="$(docker stats --no-stream --format '{{json .}}' "$TRIPLET_CONTAINER" 2>/dev/null || true)" + if [ -n "$stats" ]; then + printf '{"ts":"%s","server":"triplet","container":"%s","stats":%s}\n' \ + "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$TRIPLET_CONTAINER" "$stats" \ + >>"$OUT_DIR/container-stats.jsonl" + fi sleep "$STATS_INTERVAL" done } @@ -208,7 +169,6 @@ write_queue() { continue fi printf '%s\0%s\0%s\0%s\0%s\0%s\0' "triplet" "http://127.0.0.1:$TRIPLET_PORT" "$image" "$pass" "$request_name" "$request_path" >>"$queue" - printf '%s\0%s\0%s\0%s\0%s\0%s\0' "cantaloupe" "http://127.0.0.1:$CANTALOUPE_PORT" "$image" "$pass" "$request_name" "$request_path" >>"$queue" done <"$REQUESTS_FILE" done done @@ -588,21 +548,15 @@ main() { write_triplet_config - mkdir -p "$OUT_DIR/triplet-cache" "$OUT_DIR/cantaloupe-cache" + mkdir -p "$OUT_DIR/triplet-cache" triplet_cache_desc="derivative=false, source=false, vips_operation=false" - cantaloupe_derivative_enabled=false - cantaloupe_derivative= - cantaloupe_cache_desc="client=true, derivative=false, info=true, source=FilesystemCache, worker=false" if [ "$MODE" = "cached" ]; then - mkdir -p "$OUT_DIR/triplet-cache" "$OUT_DIR/cantaloupe-cache" + mkdir -p "$OUT_DIR/triplet-cache" triplet_cache_desc="derivative=true, root=/cache/triplet, source=false, vips_operation=false" - cantaloupe_derivative_enabled=true - cantaloupe_derivative=FilesystemCache - cantaloupe_cache_desc="client=true, derivative=true, info=true, source=FilesystemCache, worker=false" fi cat >"$OUT_DIR/run.json" </dev/null - docker run -d --name "$CANTALOUPE_CONTAINER" --network "$NETWORK" \ - -p "127.0.0.1:$CANTALOUPE_PORT:8182" \ - -v "$IMAGE_DIR:/images:ro" \ - -v "$OUT_DIR/cantaloupe-cache:/cache/cantaloupe" \ - -e CANTALOUPE_FILESYSTEMSOURCE_BASICLOOKUPSTRATEGY_PATH_PREFIX=/images/ \ - -e CANTALOUPE_SOURCE_STATIC=FilesystemSource \ - -e CANTALOUPE_CACHE_CLIENT_ENABLED=true \ - -e CANTALOUPE_CACHE_CLIENT_MAX_AGE=2592000 \ - -e CANTALOUPE_CACHE_CLIENT_MUST_REVALIDATE=false \ - -e CANTALOUPE_CACHE_CLIENT_NO_CACHE=false \ - -e CANTALOUPE_CACHE_CLIENT_NO_STORE=false \ - -e CANTALOUPE_CACHE_CLIENT_NO_TRANSFORM=true \ - -e CANTALOUPE_CACHE_CLIENT_PRIVATE=false \ - -e CANTALOUPE_CACHE_CLIENT_PROXY_REVALIDATE=false \ - -e CANTALOUPE_CACHE_CLIENT_PUBLIC=true \ - -e CANTALOUPE_CACHE_CLIENT_SHARED_MAX_AGE= \ - -e CANTALOUPE_CACHE_SERVER_DERIVATIVE_ENABLED="$cantaloupe_derivative_enabled" \ - -e CANTALOUPE_CACHE_SERVER_DERIVATIVE_TTL_SECONDS=2592000 \ - -e CANTALOUPE_CACHE_SERVER_DERIVATIVE="$cantaloupe_derivative" \ - -e CANTALOUPE_CACHE_SERVER_INFO_ENABLED=true \ - -e CANTALOUPE_CACHE_SERVER_PURGE_MISSING=false \ - -e CANTALOUPE_CACHE_SERVER_RESOLVE_FIRST=false \ - -e CANTALOUPE_CACHE_SERVER_SOURCE_TTL_SECONDS=2592000 \ - -e CANTALOUPE_CACHE_SERVER_SOURCE=FilesystemCache \ - -e CANTALOUPE_CACHE_SERVER_WORKER_ENABLED=false \ - -e CANTALOUPE_CACHE_SERVER_WORKER_INTERVAL=86400 \ - "$CANTALOUPE_IMAGE" >/dev/null - wait_http triplet "http://127.0.0.1:$TRIPLET_PORT/healthz" "$TRIPLET_CONTAINER" - wait_any_http cantaloupe "http://127.0.0.1:$CANTALOUPE_PORT/" "$CANTALOUPE_CONTAINER" - - first_encoded="$(urlencode "${images[0]}")" - if ! curl -fsS -o /dev/null "http://127.0.0.1:$CANTALOUPE_PORT/iiif/3/$first_encoded/info.json"; then - echo "warning: Cantaloupe did not return info.json for ${images[0]}; benchmark will record per-request failures" >&2 - fi export OUT_DIR CURL_TIMEOUT write_worker diff --git a/scripts/benchmark-report.py b/scripts/benchmark-report.py index 78fb423..86b0a0a 100644 --- a/scripts/benchmark-report.py +++ b/scripts/benchmark-report.py @@ -172,6 +172,41 @@ def server_order(rows): return names +def run_rows( + run, + image_count, + request_count, + measured_requests, + expected_per_server, + expected_total, +): + rows = [ + ["Started", run.get("started_at", "-")], + ["Mode", run.get("mode", "-")], + ["Images", str(image_count)], + ["Request types", str(request_count)], + ["Passes", str(run.get("passes", "-"))], + ["Warmup passes", str(run.get("warmup_passes", "-"))], + ["Concurrency", str(run.get("concurrency", "-"))], + ["Measured duration", format_duration(run.get("measured_duration_seconds"))], + ["Measured requests", str(measured_requests)], + ["Expected requests/server", str(expected_per_server)], + ["Expected requests total", str(expected_total)], + ["Stats interval", format_interval(run.get("stats_interval_seconds"))], + ["Triplet image", run.get("triplet_image", "-")], + ["Triplet cache", run.get("triplet_cache", "-")], + ["Triplet color management", run.get("triplet_color_management", "-")], + ["Triplet load access", run.get("triplet_load_access", "-")], + ["pprof", "enabled" if run.get("profile_enabled") else "disabled"], + ] + if run.get("cantaloupe_image"): + rows.insert(13, ["Cantaloupe image", run.get("cantaloupe_image", "-")]) + if run.get("cantaloupe_cache"): + insert_at = 15 if run.get("cantaloupe_image") else 14 + rows.insert(insert_at, ["Cantaloupe cache", run.get("cantaloupe_cache", "-")]) + return [row for row in rows if row[1] is not None] + + def build_report(requests_csv, resource_csv, run_json): rows = read_requests(requests_csv) groups, failures = summarize_requests(rows) @@ -194,30 +229,14 @@ def build_report(requests_csv, resource_csv, run_json): "", table( ["Metric", "Value"], - [ - ["Started", run.get("started_at", "-")], - ["Mode", run.get("mode", "-")], - ["Images", str(len({row["image"] for row in rows}))], - ["Request types", str(len(requests))], - ["Passes", str(run.get("passes", "-"))], - ["Warmup passes", str(run.get("warmup_passes", "-"))], - ["Concurrency", str(run.get("concurrency", "-"))], - [ - "Measured duration", - format_duration(run.get("measured_duration_seconds")), - ], - ["Measured requests", str(measured_requests)], - ["Expected requests/server", str(expected_per_server)], - ["Expected requests total", str(expected_total)], - ["Stats interval", format_interval(run.get("stats_interval_seconds"))], - ["Triplet image", run.get("triplet_image", "-")], - ["Cantaloupe image", run.get("cantaloupe_image", "-")], - ["Triplet cache", run.get("triplet_cache", "-")], - ["Cantaloupe cache", run.get("cantaloupe_cache", "-")], - ["Triplet color management", run.get("triplet_color_management", "-")], - ["Triplet load access", run.get("triplet_load_access", "-")], - ["pprof", "enabled" if run.get("profile_enabled") else "disabled"], - ], + run_rows( + run, + image_count, + len(requests), + measured_requests, + expected_per_server, + expected_total, + ), ), "", "## Request Comparison", From 05b9e22fa900c12ff8cb4d7dd103cd7fd1097079 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 12:01:32 +0000 Subject: [PATCH 5/9] Harden cache eviction against concurrent writes --- internal/cache/file.go | 45 +++++++++++++----------- internal/cache/file_test.go | 70 +++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 20 deletions(-) diff --git a/internal/cache/file.go b/internal/cache/file.go index 63a75ce..55ac100 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -12,6 +12,7 @@ import ( "os" "path/filepath" "sort" + "strings" "sync" "time" ) @@ -20,8 +21,9 @@ import ( // metadata in .meta. Keys are hashed (SHA-256) so they can contain any // characters and still be safe filenames. type FileStore struct { - Root string - StoreContentType bool + Root string + + storeContentType bool // MaxBytes optionally bounds total cache size; when exceeded, the // oldest payload files are evicted on the next Put based on mtime. @@ -34,6 +36,8 @@ type FileStore struct { mu sync.Mutex } +const tempFilePrefix = ".tmp-" + // NewFileStore constructs a FileStore. Root is created if it does not exist. func NewFileStore(root string, maxBytes int64) (*FileStore, error) { abs, err := filepath.Abs(root) @@ -43,7 +47,7 @@ func NewFileStore(root string, maxBytes int64) (*FileStore, error) { if err := os.MkdirAll(abs, 0o750); err != nil { return nil, fmt.Errorf("cache file mkdir: %w", err) } - return &FileStore{Root: abs, StoreContentType: true, MaxBytes: maxBytes}, nil + return &FileStore{Root: abs, storeContentType: true, MaxBytes: maxBytes}, nil } // NewFileStoreWithMaxAge constructs a FileStore with size and age eviction. @@ -64,7 +68,7 @@ func NewPayloadFileStoreWithMaxAge(root string, maxBytes int64, maxAge time.Dura if err != nil { return nil, err } - store.StoreContentType = false + store.storeContentType = false store.MaxAge = maxAge return store, nil } @@ -77,7 +81,7 @@ type fileMeta struct { func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, error) { dataPath, metaPath := s.paths(key) contentType := "" - if s.StoreContentType { + if s.storeContentType { mb, err := os.ReadFile(metaPath) if err != nil { if errors.Is(err, fs.ErrNotExist) { @@ -107,7 +111,9 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er if s.expired(storedAt, time.Now()) { _ = f.Close() _ = os.Remove(dataPath) - _ = os.Remove(metaPath) + if s.storeContentType { + _ = os.Remove(metaPath) + } return nil, Entry{}, ErrMiss } return f, Entry{ @@ -142,9 +148,7 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea _ = os.Remove(tmpName) return err } - storedAt := time.Now() - _ = os.Chtimes(dataPath, storedAt, storedAt) - if s.StoreContentType { + if s.storeContentType { meta := fileMeta{ContentType: contentType} mb, _ := json.Marshal(meta) if err := os.WriteFile(metaPath, mb, 0o640); err != nil { @@ -161,7 +165,9 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea func (s *FileStore) Delete(_ context.Context, key string) error { dataPath, metaPath := s.paths(key) _ = os.Remove(dataPath) - _ = os.Remove(metaPath) + if s.storeContentType { + _ = os.Remove(metaPath) + } return nil } @@ -179,10 +185,9 @@ func (s *FileStore) evict() { defer s.mu.Unlock() var total int64 type entry struct { - path string - metaPath string - size int64 - modTime time.Time + path string + size int64 + modTime time.Time } var entries []entry now := time.Now() @@ -190,7 +195,7 @@ func (s *FileStore) evict() { if err != nil || d.IsDir() { return nil } - if filepath.Ext(p) == ".meta" { + if filepath.Ext(p) == ".meta" || strings.HasPrefix(d.Name(), tempFilePrefix) { return nil } info, err := d.Info() @@ -198,7 +203,7 @@ func (s *FileStore) evict() { return nil } metaPath := p + ".meta" - if s.StoreContentType { + if s.storeContentType { if _, err := os.Stat(metaPath); err != nil { if errors.Is(err, fs.ErrNotExist) { _ = os.Remove(p) @@ -209,12 +214,12 @@ func (s *FileStore) evict() { } if s.expired(info.ModTime(), now) { _ = os.Remove(p) - if s.StoreContentType { + if s.storeContentType { _ = os.Remove(metaPath) } return nil } - entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), modTime: info.ModTime()}) + entries = append(entries, entry{path: p, size: info.Size(), modTime: info.ModTime()}) total += info.Size() return nil }) @@ -232,8 +237,8 @@ func (s *FileStore) evict() { return } _ = os.Remove(e.path) - if s.StoreContentType { - _ = os.Remove(e.metaPath) + if s.storeContentType { + _ = os.Remove(e.path + ".meta") } total -= e.size } diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index 05c5311..50b5ece 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -4,10 +4,12 @@ import ( "bytes" "context" "errors" + "fmt" "io" "os" "path/filepath" "strings" + "sync" "testing" "testing/synctest" "time" @@ -119,6 +121,74 @@ func TestFileStoreMissDoesNotCreateKeyDirectory(t *testing.T) { } } +func TestFileStoreEvictSkipsInFlightTempFiles(t *testing.T) { + store, err := NewFileStore(t.TempDir(), 1) + if err != nil { + t.Fatal(err) + } + + if err := store.Put(context.Background(), "a", "text/plain", strings.NewReader("a")); err != nil { + t.Fatal(err) + } + + tmp, err := os.CreateTemp(store.Root, tempFilePrefix+"*") + if err != nil { + t.Fatal(err) + } + tmpPath := tmp.Name() + if _, err := tmp.WriteString("in-flight"); err != nil { + _ = tmp.Close() + t.Fatal(err) + } + if err := tmp.Close(); err != nil { + t.Fatal(err) + } + + if err := store.Put(context.Background(), "b", "text/plain", strings.NewReader("b")); err != nil { + t.Fatal(err) + } + + if _, err := os.Stat(tmpPath); err != nil { + t.Fatalf("tmp stat err = %v, want exists", err) + } +} + +func TestFileStoreConcurrentPutsWithEviction(t *testing.T) { + t.Parallel() + + store, err := NewFileStore(t.TempDir(), 1) + if err != nil { + t.Fatal(err) + } + + const goroutines = 16 + const putsPerGoroutine = 32 + + var wg sync.WaitGroup + errCh := make(chan error, goroutines*putsPerGoroutine) + + for g := 0; g < goroutines; g++ { + g := g + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < putsPerGoroutine; i++ { + key := fmt.Sprintf("g%02d-k%02d", g, i) + if err := store.Put(context.Background(), key, "text/plain", strings.NewReader("x")); err != nil { + errCh <- fmt.Errorf("put %s: %w", key, err) + } + } + }() + } + + wg.Wait() + close(errCh) + + for err := range errCh { + t.Fatal(err) + } +} + func TestFileStoreEvictsWhenOversize(t *testing.T) { store, err := NewFileStore(t.TempDir(), 5) if err != nil { From 87972cab6ca49dc085214922b91f488c15f9783d Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 12:07:47 +0000 Subject: [PATCH 6/9] Close cache metadata write race --- internal/cache/file.go | 21 ++++++++++----- internal/cache/file_test.go | 51 ++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/internal/cache/file.go b/internal/cache/file.go index 55ac100..7f59675 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -17,9 +17,10 @@ import ( "time" ) -// FileStore stores cache entries as files under Root. Bytes go in , -// metadata in .meta. Keys are hashed (SHA-256) so they can contain any -// characters and still be safe filenames. +// FileStore stores cache entries as files under Root. Bytes go in . +// When storeContentType is enabled, metadata goes in .meta. Keys are +// hashed (SHA-256) so they can contain any characters and still be safe +// filenames. type FileStore struct { Root string @@ -144,16 +145,22 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea _ = os.Remove(tmpName) return err } - if err := os.Rename(tmpName, dataPath); err != nil { - _ = os.Remove(tmpName) - return err - } if s.storeContentType { + s.mu.Lock() + defer s.mu.Unlock() + if err := os.Rename(tmpName, dataPath); err != nil { + _ = os.Remove(tmpName) + return err + } meta := fileMeta{ContentType: contentType} mb, _ := json.Marshal(meta) if err := os.WriteFile(metaPath, mb, 0o640); err != nil { + _ = os.Remove(dataPath) return err } + } else if err := os.Rename(tmpName, dataPath); err != nil { + _ = os.Remove(tmpName) + return err } if s.MaxAge > 0 || s.MaxBytes > 0 { s.evict() diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index 50b5ece..80cfd39 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -11,7 +11,6 @@ import ( "strings" "sync" "testing" - "testing/synctest" "time" ) @@ -297,31 +296,31 @@ func TestFileStoreEvictsByModTime(t *testing.T) { } func TestFileStoreMaxAgeExpiresOnGet(t *testing.T) { - synctest.Test(t, func(t *testing.T) { - store, err := NewFileStoreWithMaxAge(t.TempDir(), 0, time.Nanosecond) - if err != nil { - t.Fatal(err) - } - if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("payload")); err != nil { - t.Fatal(err) - } - - time.Sleep(time.Millisecond) - synctest.Wait() - - _, _, err = store.Get(context.Background(), "old") - if !errors.Is(err, ErrMiss) { - t.Fatalf("err = %v, want miss", err) - } - - dataPath, metaPath := store.paths("old") - if _, err := os.Stat(dataPath); !errors.Is(err, os.ErrNotExist) { - t.Fatalf("data stat err = %v, want not exist", err) - } - if _, err := os.Stat(metaPath); !errors.Is(err, os.ErrNotExist) { - t.Fatalf("meta stat err = %v, want not exist", err) - } - }) + store, err := NewFileStoreWithMaxAge(t.TempDir(), 0, time.Hour) + if err != nil { + t.Fatal(err) + } + if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("payload")); err != nil { + t.Fatal(err) + } + + dataPath, metaPath := store.paths("old") + oldTime := time.Now().Add(-2 * time.Hour) + if err := os.Chtimes(dataPath, oldTime, oldTime); err != nil { + t.Fatal(err) + } + + _, _, err = store.Get(context.Background(), "old") + if !errors.Is(err, ErrMiss) { + t.Fatalf("err = %v, want miss", err) + } + + if _, err := os.Stat(dataPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("data stat err = %v, want not exist", err) + } + if _, err := os.Stat(metaPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("meta stat err = %v, want not exist", err) + } } func TestFileStoreMaxAgeEvictsOnPut(t *testing.T) { From 44f7bdb3fd76619658233a31e9cf1bd34aba2abf Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 12:10:51 +0000 Subject: [PATCH 7/9] Refactor cache metadata install path --- internal/cache/file.go | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/internal/cache/file.go b/internal/cache/file.go index 7f59675..db6daed 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -146,16 +146,7 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea return err } if s.storeContentType { - s.mu.Lock() - defer s.mu.Unlock() - if err := os.Rename(tmpName, dataPath); err != nil { - _ = os.Remove(tmpName) - return err - } - meta := fileMeta{ContentType: contentType} - mb, _ := json.Marshal(meta) - if err := os.WriteFile(metaPath, mb, 0o640); err != nil { - _ = os.Remove(dataPath) + if err := s.installWithMeta(tmpName, dataPath, metaPath, contentType); err != nil { return err } } else if err := os.Rename(tmpName, dataPath); err != nil { @@ -168,6 +159,23 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea return nil } +func (s *FileStore) installWithMeta(tmpName, dataPath, metaPath, contentType string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if err := os.Rename(tmpName, dataPath); err != nil { + _ = os.Remove(tmpName) + return err + } + meta := fileMeta{ContentType: contentType} + mb, _ := json.Marshal(meta) + if err := os.WriteFile(metaPath, mb, 0o640); err != nil { + _ = os.Remove(dataPath) + return err + } + return nil +} + // Delete implements Store. func (s *FileStore) Delete(_ context.Context, key string) error { dataPath, metaPath := s.paths(key) From 97cfc504afd4adb7a90858615f2c0775b5cbc182 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 08:12:16 -0400 Subject: [PATCH 8/9] cancel on new push --- .github/workflows/build-push.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build-push.yaml b/.github/workflows/build-push.yaml index d1f6bd6..2e5dfb6 100644 --- a/.github/workflows/build-push.yaml +++ b/.github/workflows/build-push.yaml @@ -4,6 +4,10 @@ on: push: workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + jobs: lint-test: runs-on: ubuntu-24.04 From bb19b82d4649566a036acb7e4bba1d37c0286646 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 2 May 2026 08:15:08 -0400 Subject: [PATCH 9/9] print less on benchmarks --- scripts/benchmark-iiif.sh | 29 ++++++++++++++++------------- scripts/benchmark-stats-summary.py | 1 - 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/scripts/benchmark-iiif.sh b/scripts/benchmark-iiif.sh index b508650..a3f41d5 100755 --- a/scripts/benchmark-iiif.sh +++ b/scripts/benchmark-iiif.sh @@ -273,9 +273,9 @@ run_matrix() { if [ "$APPEND_RUN_REPORTS" = "1" ]; then append_matrix_reports "$index" fi - echo "Benchmark matrix complete: $OUT_DIR" - echo "Report: $index" - cat "$index" + if [ "$PRINT_REPORT" = "1" ]; then + python3 "$ROOT_DIR/scripts/extract-benchmark-tldr.py" "$index" + fi } concurrency_list_for_mode() { @@ -333,6 +333,14 @@ def fmt_ms(value): def fmt_cpu_ms(value): return "-" if value is None else f"{value * 1000:.2f}" +def fmt_s(value): + return "-" if value is None else f"{value:.2f}" + +def fmt_rate_per_s(count, duration): + if not count or not duration: + return "-" + return f"{count / duration:.1f}" + def fmt_size(value): if value is None: return "-" @@ -413,7 +421,8 @@ for run_json in sorted(out_root.glob(f"{run_id}-*/run.json")): run.get("mode", "-"), str(run.get("concurrency", "-")), fmt_rate(triplet["ok"], triplet["total"]), - fmt_ms(statistics.median(triplet_times) if triplet_times else None), + fmt_s(duration if duration > 0 else None), + fmt_rate_per_s(triplet["ok"], duration), fmt_ms(percentile(triplet_times, 0.95)), fmt_ms(percentile(triplet_times, 0.99)), fmt_cpu_ms(cpu_per_request), @@ -449,8 +458,8 @@ if summary_rows: "", f"Triplet image: `{', '.join(sorted(triplet_images))}`", "", - "| Mode | Concurrency | Triplet OK | Median ms | p95 ms | p99 ms | CPU ms/req | Max MiB |", - "| --- | ---: | --- | ---: | ---: | ---: | ---: | ---: |", + "| Mode | Concurrency | Triplet OK | Duration s | Req/s | p95 ms | p99 ms | CPU ms/req | Max MiB |", + "| --- | ---: | --- | ---: | ---: | ---: | ---: | ---: | ---: |", ] for item in sorted(summary_rows, key=sort_key): lines.append("| " + " | ".join(item["row"]) + " |") @@ -650,14 +659,8 @@ PY python3 "$ROOT_DIR/scripts/benchmark-stats-summary.py" "$OUT_DIR/container-stats.jsonl" "$OUT_DIR/resource-summary.csv" python3 "$ROOT_DIR/scripts/benchmark-report.py" "$OUT_DIR/requests.csv" "$OUT_DIR/resource-summary.csv" "$OUT_DIR/run.json" "$OUT_DIR/report.md" if [ "$PRINT_REPORT" = "1" ]; then - cat "$OUT_DIR/report.md" + python3 "$ROOT_DIR/scripts/extract-benchmark-tldr.py" "$OUT_DIR/report.md" fi - echo "Benchmark complete: $OUT_DIR" - echo "Request timings: $OUT_DIR/requests.csv" - echo "Summary: $OUT_DIR/summary.csv" - echo "Resource summary: $OUT_DIR/resource-summary.csv" - echo "Report: $OUT_DIR/report.md" - echo "Container stats: $OUT_DIR/container-stats.jsonl" } main "$@" diff --git a/scripts/benchmark-stats-summary.py b/scripts/benchmark-stats-summary.py index bcbe3bf..f9364d9 100644 --- a/scripts/benchmark-stats-summary.py +++ b/scripts/benchmark-stats-summary.py @@ -6,7 +6,6 @@ import sys from collections import defaultdict - UNITS = { "B": 1, "KiB": 1024,