diff --git a/.editorconfig b/.editorconfig index cf71165..fca4537 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,18 +8,12 @@ indent_size = 4 insert_final_newline = true trim_trailing_whitespace = true -[*.zig] +[*.{zig,py}] max_line_length = 100 [*.md] -max_line_length = 120 +max_line_length = 150 trim_trailing_whitespace = false -[*.sh] +[*.{yml,yaml,json}] indent_size = 2 - -[*.{yml,yaml}] -indent_size = 2 - -[*.py] -max_line_length = 100 diff --git a/.github/workflows/benches.yml b/.github/workflows/benches.yml index b45c3bd..4c84ba2 100644 --- a/.github/workflows/benches.yml +++ b/.github/workflows/benches.yml @@ -3,6 +3,8 @@ name: Run Benchmarks on: workflow_dispatch: push: + branches: + - develop tags: - 'v*' pull_request: @@ -23,7 +25,7 @@ jobs: - name: Install Zig uses: goto-bus-stop/setup-zig@v2 with: - version: '0.15.2' + version: '0.16.0' - name: Install Dependencies run: | diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b635e08..bdeedd4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -19,7 +19,7 @@ jobs: - name: Install Zig uses: goto-bus-stop/setup-zig@v2 with: - version: '0.15.2' + version: '0.16.0' - name: Install System Dependencies run: | diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml deleted file mode 100644 index d84de84..0000000 --- a/.github/workflows/lints.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Run Linter Checks - -on: - workflow_dispatch: - push: - tags: - - 'v*' - pull_request: - branches: - - main - -permissions: - contents: read - -jobs: - lints: - runs-on: ubuntu-latest - - steps: - - name: Checkout Code - uses: actions/checkout@v4 - - - name: Install Zig - uses: goto-bus-stop/setup-zig@v2 - with: - version: '0.15.2' - - - name: Install Dependencies - run: | - sudo apt-get update - sudo apt-get install -y make - - - name: Run Linters - run: make lint diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5ee09eb..76dd4b6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,32 +3,55 @@ name: Run Tests on: workflow_dispatch: push: + branches: + - develop tags: - 'v*' pull_request: branches: - main +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read jobs: tests: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + zig-url: https://ziglang.org/download/0.16.0/zig-x86_64-linux-0.16.0.tar.xz + zig-dir: zig-x86_64-linux-0.16.0 + - os: macos-latest + zig-url: https://ziglang.org/download/0.16.0/zig-aarch64-macos-0.16.0.tar.xz + zig-dir: zig-aarch64-macos-0.16.0 + - os: windows-latest + zig-url: https://ziglang.org/download/0.16.0/zig-x86_64-windows-0.16.0.zip + zig-dir: zig-x86_64-windows-0.16.0 steps: - - name: Checkout Repository + - name: Checkout repository uses: actions/checkout@v4 - - name: Install Zig - uses: goto-bus-stop/setup-zig@v2 - with: - version: '0.15.2' + - name: Install Zig 0.16.0 (Unix) + if: runner.os != 'Windows' + run: | + curl -sSfL ${{ matrix.zig-url }} | tar -xJ + echo "$PWD/${{ matrix.zig-dir }}" >> "$GITHUB_PATH" - - name: Install Dependencies + - name: Install Zig 0.16.0 (Windows) + if: runner.os == 'Windows' + shell: pwsh run: | - sudo apt-get update - sudo apt-get install -y make + Invoke-WebRequest -Uri "${{ matrix.zig-url }}" -OutFile zig.zip + Expand-Archive zig.zip -DestinationPath . + echo "$PWD\${{ matrix.zig-dir }}" | Out-File -Append -FilePath $env:GITHUB_PATH - - name: Run the Tests - run: make test + - name: Run tests + run: zig build test --summary all diff --git a/.gitignore b/.gitignore index fde231c..1420a51 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,5 @@ docs/api/ *.dll *.exe latest +.claude/ +.codex diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..734247d --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,155 @@ +# AGENTS.md + +This file provides guidance to coding agents collaborating on this repository. + +## Mission + +Ordered is a sorted-collection library for Zig. It provides in-memory data structures that keep their elements sorted: +two set types (value-only) and four map types (key-value), all exposing a small, uniform API for insertion, lookup, +removal, and in-order iteration. +Priorities, in order: + +1. Correctness of ordering, iteration, and memory management across insertions and removals. +2. Minimal public API that is consistent across every container type. +3. Zero non-Zig dependencies, maintainable, and well-tested code. +4. Cross-platform support (Linux, macOS, and Windows). + +## Core Rules + +- Use English for code, comments, docs, and tests. +- Prefer small, focused changes over large refactoring. +- Add comments only when they clarify non-obvious behavior. +- Do not add features, error handling, or abstractions beyond what is needed for the current task. +- Keep the project dependency-free: no external Zig packages or C libraries unless explicitly agreed. + +## Writing Style + +- Use Oxford commas in inline lists: "a, b, and c" not "a, b, c". +- Do not use em dashes. Restructure the sentence, or use a colon or semicolon instead. +- Avoid colorful adjectives and adverbs. Write "TCP proxy" not "lightweight TCP proxy", "scoring components" not "transparent scoring components". +- Use noun phrases for checklist items, not imperative verbs. Write "redundant index detection" not "detect redundant indexes". +- Headings in Markdown files must be in the title case: "Build from Source" not "Build from source". Minor words (a, an, the, and, but, or, for, in, + on, at, to, by, of, is, are, was, were, be) stay lowercase unless they are the first word. + +## Repository Layout + +- `src/lib.zig`: Public API entry point. Re-exports `SortedSet`, `RedBlackTreeSet`, `BTreeMap`, `SkipListMap`, `TrieMap`, and `CartesianTreeMap`. +- `src/ordered/sorted_set.zig`: `SortedSet` (insertion-sorted `std.ArrayList` backed by a linear scan for insert and removal). +- `src/ordered/red_black_tree_set.zig`: `RedBlackTreeSet` (self-balancing BST; takes an explicit three-way comparison function, consistent with the other generic-key containers). +- `src/ordered/btree_map.zig`: `BTreeMap` (cache-friendly B-tree with configurable branching factor). +- `src/ordered/skip_list_map.zig`: `SkipListMap` (probabilistic skip list with a per-instance PRNG). +- `src/ordered/trie_map.zig`: `TrieMap` (prefix tree, specialised for `[]const u8` keys). +- `src/ordered/cartesian_tree_map.zig`: `CartesianTreeMap` (treap combining BST ordering with max-heap priorities; takes an explicit key-comparison function). +- `examples/`: Self-contained example programs (`e1_btree_map.zig` through `e6_cartesian_tree_map.zig`) built as executables via `build.zig`. +- `benches/`: Benchmark programs (`b1_btree_map.zig` through `b6_cartesian_tree_map.zig`) built in `ReleaseFast`. +- `benches/util/timer.zig`: Internal compatibility shim for the removed `std.time.Timer`, backed by `std.Io.Timestamp`. +- `.github/workflows/`: CI workflows (`tests.yml` for unit tests on Linux, macOS, and Windows, `docs.yml`, `lints.yml`, and `benches.yml`). +- `build.zig` / `build.zig.zon`: Zig build configuration and package metadata. +- `Makefile`: GNU Make wrapper around `zig build` targets. +- `docs/`: Generated API docs land in `docs/api/` (produced by `make docs`). + +## Architecture + +### Common API Across Containers + +Every map exposes `init(allocator)`, `deinit()`, `count()`, `contains(key)`, `get(key)`, `getPtr(key)`, `put(key, value)`, +`remove(key)`, and `iterator()`. Every set exposes the same shape with value-only variants. New containers should +adopt this surface instead of inventing new method names. + +### Iteration + +Iterators return entries in the natural sort order of the underlying structure. Modifying a container during +iteration is undefined behaviour; each container's docstring states this explicitly. New iterators should follow +the same convention and document the contract. + +### Randomised Containers + +`SkipListMap` and `CartesianTreeMap` use randomness internally (levels and priorities, respectively). Each instance +owns its own `std.Random.DefaultPrng`, seeded at `init` from an ASLR-derived hash of a stack address, which is +sufficient randomness for skip-list level selection and treap priority generation. Callers do not need to provide +a seed. Tests that require deterministic behaviour should use the explicit `*WithPriority` / level-setting API +on the container rather than seeding the PRNG directly. + +### Public API Surface + +Everything re-exported from `src/lib.zig` is part of the public API. Changes to names, signatures, or semantics +there are breaking. The rest of `src/ordered/` is internal and may be refactored freely as long as the public +surface and its behavior are preserved. + +### Dependencies + +Ordered has **no external Zig or C dependencies**. +The only `build.zig.zon` entries should be Ordered itself. +Please do not add dependencies without prior discussion. + +## Zig Conventions + +- Zig version: 0.16.0 (as declared in `build.zig.zon` and the Makefile's `ZIG_LOCAL` path). +- Formatting is enforced by `zig fmt`. Run `make format` before committing. +- Naming follows Zig standard-library conventions: `camelCase` for functions (e.g. `getPtr`, `keysWithPrefix`), `snake_case` for local variables and + struct fields, `PascalCase` for types and structs, and `SCREAMING_SNAKE_CASE` for top-level compile-time constants. + +## Required Validation + +Run the relevant targets for any change: + +| Target | Command | What It Runs | +|------------------|-------------------------------------|-------------------------------------------------------------------| +| Unit tests | `make test` | Inline `test` blocks across `src/lib.zig` and `src/ordered/*.zig` | +| Lint | `make lint` | Checks Zig formatting with `zig fmt --check src examples` | +| Single example | `make run EXAMPLE=e1_btree_map` | Builds and runs one example program | +| All examples | `make run` | Builds and runs every example under `examples/` | +| Single benchmark | `make bench BENCHMARK=b1_btree_map` | Builds (ReleaseFast) and runs one benchmark program | +| All benchmarks | `make bench` | Builds and runs every benchmark under `benches/` | +| Docs | `make docs` | Generates API docs into `docs/api` | +| Everything | `make all` | Runs `build`, `test`, `lint`, and `docs` | + +## First Contribution Flow + +1. Read the relevant module under `src/ordered/` (often one container file per change). +2. Implement the smallest change that covers the requirement. +3. Add or update inline `test` blocks in the changed Zig module to cover the new behavior. +4. Run `make test` and `make lint`. +5. If the change could affect performance, also run the corresponding benchmark with `make bench BENCHMARK=bN_*`. + +Good first tasks: + +- New example under `examples/` demonstrating a container method, listed in `examples/README.md`. +- Additional inline `test` block covering a boundary case (empty container, single element, duplicate key, unicode key). +- Performance improvement in an existing container, paired with benchmark output before/after. +- Documentation refinement in a container module's top-level docstring. + +## Testing Expectations + +- Unit and regression tests live as inline `test` blocks in the module they cover (`src/lib.zig` and `src/ordered/*.zig`). There is no separate + `tests/` directory. +- Tests are discovered automatically via `std.testing.refAllDecls(@This())` in `src/lib.zig`, so new `test` blocks only need to live in a module that + is reachable from `lib.zig`. +- Every new public function or container branch must ship with at least one `test` block that exercises it, including the error paths where + applicable. +- Memory tests should use `std.testing.allocator` so leaks are caught automatically. +- No public API change is complete without a test covering the new or changed behavior. + +## Change Design Checklist + +Before coding: + +1. Modules affected by the change (which container file, or `lib.zig` if the public surface moves). +2. Whether the change alters ordering, iteration, or removal semantics, and therefore needs explicit test coverage. +3. Public API impact, i.e. whether the change adds to or alters anything re-exported from `src/lib.zig`, and is therefore additive or breaking. +4. Cross-platform implications, especially for anything that touches timing, the filesystem, or non-deterministic ordering. + +Before submitting: + +1. `make test` passes. +2. `make lint` passes. +3. `make bench` still succeeds for any benchmark whose container was touched, with a note on whether performance moved. +4. Docs updated (`make docs`) if the public API surface changed, and `README.md` ticked or updated if a container was added or removed. + +## Commit and PR Hygiene + +- Keep commits scoped to one logical change. +- PR descriptions should include: + 1. Behavioral change summary. + 2. Tests added or updated. + 3. Whether benchmarks were run locally (yes/no), and on which OS, with before/after numbers when relevant. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index afe7029..0928106 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,6 +28,9 @@ would like to work on or if it has already been resolved. ### Development Workflow +> [!IMPORTANT] +> If you're using an AI-assisted coding tool like Claude Code or Codex, make sure the AI follows the instructions in the [AGENTS.md](AGENTS.md) file. + #### Prerequisites Install GNU Make on your system if it's not already installed. diff --git a/Makefile b/Makefile index cacd24e..dffe7a0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ # ################################################################################ # # Configuration and Variables # ################################################################################ -ZIG ?= $(shell which zig || echo ~/.local/share/zig/0.15.2/zig) +ZIG_LOCAL := $(HOME)/.local/share/zig/0.16.0/zig +ZIG ?= $(shell test -x $(ZIG_LOCAL) && echo $(ZIG_LOCAL) || which zig) BUILD_TYPE ?= Debug BUILD_OPTS = -Doptimize=$(BUILD_TYPE) JOBS ?= $(shell nproc || echo 2) diff --git a/README.md b/README.md index 797f35f..091fde4 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,8 @@ [![Tests](https://img.shields.io/github/actions/workflow/status/CogitatorTech/ordered/tests.yml?label=tests&style=flat&labelColor=282c34&logo=github)](https://github.com/CogitatorTech/ordered/actions/workflows/tests.yml) [![Benchmarks](https://img.shields.io/github/actions/workflow/status/CogitatorTech/ordered/benches.yml?label=benchmarks&style=flat&labelColor=282c34&logo=github)](https://github.com/CogitatorTech/ordered/actions/workflows/benches.yml) -[![CodeFactor](https://img.shields.io/codefactor/grade/github/CogitatorTech/ordered?label=quality&style=flat&labelColor=282c34&logo=codefactor)](https://www.codefactor.io/repository/github/CogitatorTech/ordered) -[![Zig Version](https://img.shields.io/badge/Zig-0.15.2-orange?logo=zig&labelColor=282c34)](https://ziglang.org/download/) +[![Zig Version](https://img.shields.io/badge/Zig-0.16.0-orange?logo=zig&labelColor=282c34)](https://ziglang.org/download/) [![Release](https://img.shields.io/github/release/CogitatorTech/ordered.svg?label=release&style=flat&labelColor=282c34&logo=github)](https://github.com/CogitatorTech/ordered/releases/latest) -
[![Docs](https://img.shields.io/badge/docs-read-blue?style=flat&labelColor=282c34&logo=read-the-docs)](https://CogitatorTech.github.io/ordered/) [![Examples](https://img.shields.io/badge/examples-view-green?style=flat&labelColor=282c34&logo=zig)](https://github.com/CogitatorTech/ordered/tree/main/examples) [![License](https://img.shields.io/badge/license-MIT-007ec6?label=license&style=flat&labelColor=282c34&logo=open-source-initiative)](https://github.com/CogitatorTech/ordered/blob/main/LICENSE) @@ -78,9 +76,18 @@ Run the following command in the root directory of your project to download Orde zig fetch --save=ordered "https://github.com/CogitatorTech/ordered/archive/.tar.gz" ``` -Replace `` with the desired branch or release tag, like `main` (for the development version) or `v0.1.0`. +Replace `` with the desired branch or release tag, like `main` (for the development version) or `v0.3.0`. This command will download Ordered and add it to Zig's global cache and update your project's `build.zig.zon` file. +Zig version supported by each tagged release: + +| Zig | Ordered Tags | +|----------|--------------| +| `0.16.0` | `v0.2.x` | +| `0.15.2` | `v0.1.0` | + +The `main` branch normally is developed and build using the latest (non-developmental) Zig release. + #### Adding to Build Script Next, modify your `build.zig` file to make Ordered available to your build target as a module. diff --git a/benches/b1_btree_map.zig b/benches/b1_btree_map.zig index 2ab171a..7a0f4ad 100644 --- a/benches/b1_btree_map.zig +++ b/benches/b1_btree_map.zig @@ -1,9 +1,9 @@ const std = @import("std"); const ordered = @import("ordered"); -const Timer = std.time.Timer; +const Timer = @import("util/timer.zig").Timer; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa = std.heap.DebugAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); diff --git a/benches/b2_sorted_set.zig b/benches/b2_sorted_set.zig index 8adabdd..32cbc09 100644 --- a/benches/b2_sorted_set.zig +++ b/benches/b2_sorted_set.zig @@ -1,9 +1,9 @@ const std = @import("std"); const ordered = @import("ordered"); -const Timer = std.time.Timer; +const Timer = @import("util/timer.zig").Timer; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa = std.heap.DebugAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); diff --git a/benches/b3_red_black_tree_set.zig b/benches/b3_red_black_tree_set.zig index 22a6419..f1eb5e0 100644 --- a/benches/b3_red_black_tree_set.zig +++ b/benches/b3_red_black_tree_set.zig @@ -1,9 +1,9 @@ const std = @import("std"); const ordered = @import("ordered"); -const Timer = std.time.Timer; +const Timer = @import("util/timer.zig").Timer; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa = std.heap.DebugAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -20,15 +20,12 @@ pub fn main() !void { } } -const Context = struct { - pub fn lessThan(self: @This(), a: i32, b: i32) bool { - _ = self; - return a < b; - } -}; +fn i32Compare(lhs: i32, rhs: i32) std.math.Order { + return std.math.order(lhs, rhs); +} fn benchmarkInsert(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.RedBlackTreeSet(i32, Context).init(allocator, Context{}); + var tree = ordered.RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var timer = try Timer.start(); @@ -50,7 +47,7 @@ fn benchmarkInsert(allocator: std.mem.Allocator, size: usize) !void { } fn benchmarkFind(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.RedBlackTreeSet(i32, Context).init(allocator, Context{}); + var tree = ordered.RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; @@ -79,7 +76,7 @@ fn benchmarkFind(allocator: std.mem.Allocator, size: usize) !void { } fn benchmarkRemove(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.RedBlackTreeSet(i32, Context).init(allocator, Context{}); + var tree = ordered.RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; @@ -106,7 +103,7 @@ fn benchmarkRemove(allocator: std.mem.Allocator, size: usize) !void { } fn benchmarkIterator(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.RedBlackTreeSet(i32, Context).init(allocator, Context{}); + var tree = ordered.RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; diff --git a/benches/b4_skip_list_map.zig b/benches/b4_skip_list_map.zig index 098f06b..451b824 100644 --- a/benches/b4_skip_list_map.zig +++ b/benches/b4_skip_list_map.zig @@ -1,9 +1,9 @@ const std = @import("std"); const ordered = @import("ordered"); -const Timer = std.time.Timer; +const Timer = @import("util/timer.zig").Timer; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa = std.heap.DebugAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); diff --git a/benches/b5_trie_map.zig b/benches/b5_trie_map.zig index f63b490..db7dc88 100644 --- a/benches/b5_trie_map.zig +++ b/benches/b5_trie_map.zig @@ -1,9 +1,9 @@ const std = @import("std"); const ordered = @import("ordered"); -const Timer = std.time.Timer; +const Timer = @import("util/timer.zig").Timer; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa = std.heap.DebugAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); diff --git a/benches/b6_cartesian_tree_map.zig b/benches/b6_cartesian_tree_map.zig index 0cd77a1..da48cfe 100644 --- a/benches/b6_cartesian_tree_map.zig +++ b/benches/b6_cartesian_tree_map.zig @@ -1,9 +1,13 @@ const std = @import("std"); const ordered = @import("ordered"); -const Timer = std.time.Timer; +const Timer = @import("util/timer.zig").Timer; + +fn i32Compare(lhs: i32, rhs: i32) std.math.Order { + return std.math.order(lhs, rhs); +} pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa = std.heap.DebugAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -21,7 +25,7 @@ pub fn main() !void { } fn benchmarkPut(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.CartesianTreeMap(i32, i32).init(allocator); + var tree = ordered.CartesianTreeMap(i32, i32, i32Compare).init(allocator); defer tree.deinit(); var timer = try Timer.start(); @@ -43,7 +47,7 @@ fn benchmarkPut(allocator: std.mem.Allocator, size: usize) !void { } fn benchmarkGet(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.CartesianTreeMap(i32, i32).init(allocator); + var tree = ordered.CartesianTreeMap(i32, i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; @@ -72,7 +76,7 @@ fn benchmarkGet(allocator: std.mem.Allocator, size: usize) !void { } fn benchmarkRemove(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.CartesianTreeMap(i32, i32).init(allocator); + var tree = ordered.CartesianTreeMap(i32, i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; @@ -99,7 +103,7 @@ fn benchmarkRemove(allocator: std.mem.Allocator, size: usize) !void { } fn benchmarkIterator(allocator: std.mem.Allocator, size: usize) !void { - var tree = ordered.CartesianTreeMap(i32, i32).init(allocator); + var tree = ordered.CartesianTreeMap(i32, i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; diff --git a/benches/util/timer.zig b/benches/util/timer.zig new file mode 100644 index 0000000..5a3f9f9 --- /dev/null +++ b/benches/util/timer.zig @@ -0,0 +1,31 @@ +//! A minimal replacement for `std.time.Timer`, which was removed from the +//! standard library in Zig 0.16. Backed by `std.Io.Timestamp` on the +//! monotonic `.real` clock. API-compatible with the subset of the old +//! Timer that the ordered benchmarks use (`start`, `read`, `lap`). +const std = @import("std"); + +pub const Timer = struct { + start_ns: i96, + + pub fn start() !Timer { + return .{ .start_ns = Timer.now() }; + } + + /// Nanoseconds since the last `start`/`lap`. Does not modify state. + pub fn read(self: *const Timer) u64 { + return @intCast(Timer.now() - self.start_ns); + } + + /// Returns nanoseconds since the last `start`/`lap` and resets the + /// internal start to the current instant. + pub fn lap(self: *Timer) u64 { + const now_ns = Timer.now(); + const elapsed = now_ns - self.start_ns; + self.start_ns = now_ns; + return @intCast(elapsed); + } + + fn now() i96 { + return std.Io.Timestamp.now(std.Options.debug_io, .real).nanoseconds; + } +}; diff --git a/build.zig b/build.zig index ea51958..d3af477 100644 --- a/build.zig +++ b/build.zig @@ -1,9 +1,10 @@ const std = @import("std"); -const fs = std.fs; pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + const io = b.graph.io; + const lib_source = b.path("src/lib.zig"); const lib_module = b.addModule("ordered", .{ .root_source_file = lib_source, @@ -15,19 +16,27 @@ pub fn build(b: *std.Build) void { .root_module = lib_module, }); b.installArtifact(lib); + + // --- Docs Setup --- const docs_step = b.step("docs", "Generate API documentation"); const doc_install_path = "docs/api"; + + // Zig's `-femit-docs=` writes the leaf dir but does not create + // intermediate parents, and git does not track empty directories, so a + // fresh checkout may have no `docs/` at all. Create it portably here + // (idempotent: createDirPath is a no-op when the directory already exists). + const ensure_docs_dir = EnsureDirStep.create(b, "docs"); const gen_docs_cmd = b.addSystemCommand(&[_][]const u8{ b.graph.zig_exe, "build-lib", "src/lib.zig", "-femit-docs=" ++ doc_install_path, + "-fno-emit-bin", }); - const mkdir_cmd = b.addSystemCommand(&[_][]const u8{ - "mkdir", "-p", doc_install_path, - }); - gen_docs_cmd.step.dependOn(&mkdir_cmd.step); + gen_docs_cmd.step.dependOn(&ensure_docs_dir.step); docs_step.dependOn(&gen_docs_cmd.step); + + // --- Tests --- const lib_unit_tests = b.addTest(.{ .root_module = lib_module, }); @@ -35,20 +44,19 @@ pub fn build(b: *std.Build) void { const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_lib_unit_tests.step); - // Build examples + // --- Examples --- const examples_path = "examples"; - examples_blk: { - var examples_dir = fs.cwd().openDir(examples_path, .{ .iterate = true }) catch |err| { - if (err == error.FileNotFound or err == error.NotDir) break :examples_blk; - @panic("Can't open 'examples' directory"); - }; - defer examples_dir.close(); - var dir_iter = examples_dir.iterate(); - while (dir_iter.next() catch @panic("Failed to iterate examples")) |entry| { + if (b.build_root.handle.openDir(io, examples_path, .{ .iterate = true })) |examples_dir| { + var dir = examples_dir; + defer dir.close(io); + const run_all_examples = b.step("run-all", "Run all examples"); + var it = dir.iterate(); + while (it.next(io) catch null) |entry| { + if (entry.kind != .file) continue; if (!std.mem.endsWith(u8, entry.name, ".zig")) continue; - const exe_name = fs.path.stem(entry.name); + const exe_name = entry.name[0 .. entry.name.len - 4]; const exe_path = b.fmt("{s}/{s}", .{ examples_path, entry.name }); - const exe_module = b.createModule(.{ + const exe_module = b.addModule(exe_name, .{ .root_source_file = b.path(exe_path), .target = target, .optimize = optimize, @@ -64,23 +72,27 @@ pub fn build(b: *std.Build) void { const run_step_desc = b.fmt("Run the {s} example", .{exe_name}); const run_step = b.step(run_step_name, run_step_desc); run_step.dependOn(&run_cmd.step); + run_all_examples.dependOn(run_step); } + } else |err| switch (err) { + // Used as a library dependency: no examples directory at the import root. + error.FileNotFound, error.NotDir => {}, + else => @panic(@errorName(err)), } - // Build benchmarks + // --- Benchmarks --- const benches_path = "benches"; - benches_blk: { - var benches_dir = fs.cwd().openDir(benches_path, .{ .iterate = true }) catch |err| { - if (err == error.FileNotFound or err == error.NotDir) break :benches_blk; - @panic("Can't open 'benches' directory"); - }; - defer benches_dir.close(); - var dir_iter = benches_dir.iterate(); - while (dir_iter.next() catch @panic("Failed to iterate benches")) |entry| { + if (b.build_root.handle.openDir(io, benches_path, .{ .iterate = true })) |benches_dir| { + var dir = benches_dir; + defer dir.close(io); + const bench_all = b.step("bench-all", "Run all benchmarks"); + var it = dir.iterate(); + while (it.next(io) catch null) |entry| { + if (entry.kind != .file) continue; if (!std.mem.endsWith(u8, entry.name, ".zig")) continue; - const bench_name = fs.path.stem(entry.name); + const bench_name = entry.name[0 .. entry.name.len - 4]; const bench_path = b.fmt("{s}/{s}", .{ benches_path, entry.name }); - const bench_module = b.createModule(.{ + const bench_module = b.addModule(bench_name, .{ .root_source_file = b.path(bench_path), .target = target, .optimize = .ReleaseFast, // Use ReleaseFast for benchmarks @@ -96,6 +108,39 @@ pub fn build(b: *std.Build) void { const bench_step_desc = b.fmt("Run the {s} benchmark", .{bench_name}); const bench_step = b.step(bench_step_name, bench_step_desc); bench_step.dependOn(&run_bench_cmd.step); + bench_all.dependOn(bench_step); } + } else |err| switch (err) { + error.FileNotFound, error.NotDir => {}, + else => @panic(@errorName(err)), } } + +/// Build step that ensures a directory (relative to the build root) exists. +/// Runs `std.fs.Dir.createDirPath` at make-time, so it only fires when a +/// step that depends on it is actually being built. Portable across Linux, +/// macOS, and Windows. +const EnsureDirStep = struct { + step: std.Build.Step, + sub_path: []const u8, + + fn create(b: *std.Build, sub_path: []const u8) *EnsureDirStep { + const self = b.allocator.create(EnsureDirStep) catch @panic("OOM"); + self.* = .{ + .step = std.Build.Step.init(.{ + .id = .custom, + .name = b.fmt("ensure {s}/", .{sub_path}), + .owner = b, + .makeFn = make, + }), + .sub_path = sub_path, + }; + return self; + } + + fn make(step: *std.Build.Step, options: std.Build.Step.MakeOptions) anyerror!void { + _ = options; + const self: *EnsureDirStep = @fieldParentPtr("step", step); + try step.owner.build_root.handle.createDirPath(step.owner.graph.io, self.sub_path); + } +}; diff --git a/build.zig.zon b/build.zig.zon index 7157154..2d0e8e4 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,8 +1,8 @@ .{ .name = .ordered, - .version = "0.1.1", + .version = "0.2.0", .fingerprint = 0xc3121f99b0352e1b, // Changing this has security and trust implications. - .minimum_zig_version = "0.15.2", + .minimum_zig_version = "0.16.0", .paths = .{ "build.zig", "build.zig.zon", diff --git a/examples/e3_red_black_tree_set.zig b/examples/e3_red_black_tree_set.zig index 6fa68ee..9186890 100644 --- a/examples/e3_red_black_tree_set.zig +++ b/examples/e3_red_black_tree_set.zig @@ -1,19 +1,17 @@ const std = @import("std"); const ordered = @import("ordered"); -// Context object for comparison. This is needed by RedBlackTreeSet -const I32Context = struct { - // This function must be public to be visible from the library code. - pub fn lessThan(_: @This(), a: i32, b: i32) bool { - return a < b; - } -}; +// Comparison function for the keys. Returns a `std.math.Order` — the same +// three-way shape used by every other generic-key container in the library. +fn i32Compare(lhs: i32, rhs: i32) std.math.Order { + return std.math.order(lhs, rhs); +} pub fn main() !void { const allocator = std.heap.page_allocator; std.debug.print("## RedBlackTreeSet Example (as a Set) ##\n", .{}); - var rbt = ordered.RedBlackTreeSet(i32, I32Context).init(allocator, .{}); + var rbt = ordered.RedBlackTreeSet(i32, i32Compare).init(allocator); defer rbt.deinit(); try rbt.put(40); diff --git a/examples/e6_cartesian_tree_map.zig b/examples/e6_cartesian_tree_map.zig index 5cb971e..bf342f5 100644 --- a/examples/e6_cartesian_tree_map.zig +++ b/examples/e6_cartesian_tree_map.zig @@ -1,11 +1,15 @@ const std = @import("std"); const ordered = @import("ordered"); +fn i32Compare(lhs: i32, rhs: i32) std.math.Order { + return std.math.order(lhs, rhs); +} + pub fn main() !void { const allocator = std.heap.page_allocator; std.debug.print("## CartesianTreeMap Example ##\n", .{}); - var cartesian_tree = ordered.CartesianTreeMap(i32, []const u8).init(allocator); + var cartesian_tree = ordered.CartesianTreeMap(i32, []const u8, i32Compare).init(allocator); defer cartesian_tree.deinit(); try cartesian_tree.put(50, "fifty"); diff --git a/pyproject.toml b/pyproject.toml index ba681e2..bb32ed8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,14 +8,3 @@ dependencies = [ "python-dotenv (>=1.1.0,<2.0.0)", "pre-commit (>=4.2.0,<5.0.0)" ] - -[project.optional-dependencies] -dev = [ - "pytest (>=8.0.1,<9.0.0)", - "pytest-cov (>=6.0.0,<7.0.0)", - "pytest-mock (>=3.14.0,<4.0.0)", - "pytest-asyncio (>=0.26.0,<0.27.0)", - "mypy (>=1.11.1,<2.0.0)", - "ruff (>=0.9.3,<1.0.0)", - "icecream (>=2.1.4,<3.0.0)" -] diff --git a/src/ordered/btree_map.zig b/src/ordered/btree_map.zig index 69d7389..cf04c69 100644 --- a/src/ordered/btree_map.zig +++ b/src/ordered/btree_map.zig @@ -48,8 +48,20 @@ pub fn BTreeMap( comptime compare: fn (lhs: K, rhs: K) std.math.Order, comptime BRANCHING_FACTOR: u16, ) type { - std.debug.assert(BRANCHING_FACTOR >= 3); - const MIN_KEYS = (BRANCHING_FACTOR - 1) / 2; + comptime { + if (BRANCHING_FACTOR < 4) { + @compileError("BTreeMap: BRANCHING_FACTOR must be at least 4"); + } + } + // Minimum keys per non-root node. + // + // We pick this so that `2 * MIN_KEYS + 1` (the size of a merge of two + // MIN_KEYS siblings plus the pulled-down separator) always fits in the + // `BRANCHING_FACTOR - 1`-element key array. For even B this simplifies + // to `(B - 1) / 2` — identical to the common textbook formula. For odd + // B it relaxes MIN_KEYS by one compared to the textbook, which avoids + // an over-full node after merge at the cost of slightly looser packing. + const MIN_KEYS = (BRANCHING_FACTOR - 2) / 2; return struct { const Self = @This(); @@ -528,7 +540,12 @@ pub fn BTreeMap( }; fn init(allocator: std.mem.Allocator, root: ?*Node) !Iterator { - var stack = std.ArrayList(StackFrame){}; + var stack: std.ArrayList(StackFrame) = .empty; + // If any append below fails after the first, the local + // `stack` goes out of scope without being moved into the + // returned Iterator; its heap buffer would leak without + // this errdefer. + errdefer stack.deinit(allocator); if (root) |r| { try stack.append(allocator, StackFrame{ .node = r, .index = 0 }); @@ -783,3 +800,62 @@ test "BTreeMap: negative keys" { try std.testing.expectEqual(@as(i32, 10), map.get(-10).?.*); try std.testing.expectEqual(@as(i32, -5), map.get(5).?.*); } + +test "regression: BTreeMap deletion works with odd BRANCHING_FACTOR" { + // Bug B1: the old `MIN_KEYS = (B-1)/2` formula combined with the split + // layout left odd-B right siblings underfull; a subsequent deletion that + // merged two MIN_KEYS siblings plus a separator overflowed the + // `[B-1]K` keys array, panicking at merge time. This test exercises + // the exact path that used to crash for B=5. + const allocator = std.testing.allocator; + var map = BTreeMap(i32, i32, i32Compare, 5).init(allocator); + defer map.deinit(); + + // Fill, split, then fill again so both halves are at MIN_KEYS. + try map.put(0, 0); + try map.put(1, 1); + try map.put(2, 2); + try map.put(3, 3); + try map.put(4, 4); + try std.testing.expectEqual(@as(usize, 5), map.count()); + + // Delete from the right side: this forces the merge path. + try std.testing.expectEqual(@as(i32, 4), map.remove(4).?); + try std.testing.expectEqual(@as(usize, 4), map.count()); + + // Verify the remaining tree is intact and ordered. + try std.testing.expectEqual(@as(i32, 0), map.get(0).?.*); + try std.testing.expectEqual(@as(i32, 1), map.get(1).?.*); + try std.testing.expectEqual(@as(i32, 2), map.get(2).?.*); + try std.testing.expectEqual(@as(i32, 3), map.get(3).?.*); + try std.testing.expect(map.get(4) == null); +} + +test "regression: BTreeMap sequential delete with odd B stays valid" { + // Larger stress: insert 200 items with B=7 (odd, previously broken), + // delete half in random-ish order, then confirm the remainder is still + // accessible and ordered. + const allocator = std.testing.allocator; + var map = BTreeMap(i32, i32, i32Compare, 7).init(allocator); + defer map.deinit(); + + var i: i32 = 0; + while (i < 200) : (i += 1) { + try map.put(i, i * 2); + } + try std.testing.expectEqual(@as(usize, 200), map.count()); + + // Delete every other item in ascending order. + i = 0; + while (i < 200) : (i += 2) { + const v = map.remove(i); + try std.testing.expectEqual(@as(i32, i * 2), v.?); + } + try std.testing.expectEqual(@as(usize, 100), map.count()); + + // Remaining items must still be reachable with correct values. + i = 1; + while (i < 200) : (i += 2) { + try std.testing.expectEqual(@as(i32, i * 2), map.get(i).?.*); + } +} diff --git a/src/ordered/cartesian_tree_map.zig b/src/ordered/cartesian_tree_map.zig index 326d24b..d7e7568 100644 --- a/src/ordered/cartesian_tree_map.zig +++ b/src/ordered/cartesian_tree_map.zig @@ -36,7 +36,11 @@ const testing = std.testing; const Allocator = std.mem.Allocator; const Order = std.math.Order; -pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { +pub fn CartesianTreeMap( + comptime K: type, + comptime V: type, + comptime compare: fn (lhs: K, rhs: K) std.math.Order, +) type { return struct { const Self = @This(); @@ -59,14 +63,22 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { root: ?*Node = null, allocator: Allocator, len: usize = 0, + prng: std.Random.DefaultPrng, /// Creates a new empty Cartesian tree. /// /// ## Parameters /// - `allocator`: Memory allocator for node allocation pub fn init(allocator: Allocator) Self { + // Seed the per-instance PRNG from a stack-address-derived hash. + // ASLR makes this differ across process runs, which is enough + // randomness for treap priorities. Not a cryptographic RNG. + var anchor: u8 = 0; + const addr = @intFromPtr(&anchor); + const seed: u64 = @truncate(std.hash.Wyhash.hash(0, std.mem.asBytes(&addr))); return Self{ .allocator = allocator, + .prng = std.Random.DefaultPrng.init(seed), }; } @@ -97,7 +109,7 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { /// Inserts a key-value pair with a random priority. /// - /// Uses cryptographically random priorities to ensure expected O(log n) performance. + /// Uses per-instance PRNG priorities to ensure expected O(log n) performance. /// If the key already exists, updates its value and priority. /// /// Time complexity: O(log n) expected @@ -105,7 +117,7 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { /// ## Errors /// Returns `error.OutOfMemory` if node allocation fails. pub fn put(self: *Self, key: K, value: V) !void { - const priority = std.crypto.random.int(u32); + const priority = self.prng.random().int(u32); try self.putWithPriority(key, value, priority); } @@ -131,17 +143,17 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { return; } - self.root = try self.insertNode(self.root, new_node); + self.root = self.insertNode(self.root, new_node); } - fn insertNode(self: *Self, root: ?*Node, new_node: *Node) !?*Node { + fn insertNode(self: *Self, root: ?*Node, new_node: *Node) ?*Node { if (root == null) { self.len += 1; return new_node; } const node = root.?; - const key_cmp = std.math.order(new_node.key, node.key); + const key_cmp = compare(new_node.key, node.key); if (key_cmp == .eq) { // Replace existing value @@ -161,9 +173,9 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { } if (key_cmp == .lt) { - node.left = try self.insertNode(node.left, new_node); + node.left = self.insertNode(node.left, new_node); } else { - node.right = try self.insertNode(node.right, new_node); + node.right = self.insertNode(node.right, new_node); } return root; @@ -180,7 +192,7 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { } const node = root.?; - const key_cmp = std.math.order(key, node.key); + const key_cmp = compare(key, node.key); if (key_cmp == .lt) { const split_result = self.split(node.left, key); @@ -206,7 +218,7 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { if (root == null) return null; const node = root.?; - const key_cmp = std.math.order(key, node.key); + const key_cmp = compare(key, node.key); return switch (key_cmp) { .eq => node.value, @@ -229,7 +241,7 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { if (root == null) return null; const node = root.?; - const key_cmp = std.math.order(key, node.key); + const key_cmp = compare(key, node.key); return switch (key_cmp) { .eq => &node.value, @@ -256,7 +268,7 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { } const node = root.?; - const key_cmp = std.math.order(key, node.key); + const key_cmp = compare(key, node.key); if (key_cmp == .eq) { const value = node.value; @@ -315,9 +327,13 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { pub fn init(allocator: Allocator, root: ?*Node) !Iterator { var it = Iterator{ - .stack = std.ArrayList(*Node){}, + .stack = .empty, .allocator = allocator, }; + // `pushLeft` issues a sequence of `stack.append`s; without + // this errdefer, the heap buffer leaks if the second or + // later append hits OOM. + errdefer it.stack.deinit(allocator); try it.pushLeft(root); return it; } @@ -358,8 +374,12 @@ pub fn CartesianTreeMap(comptime K: type, comptime V: type) type { }; } +fn i32Compare(lhs: i32, rhs: i32) std.math.Order { + return std.math.order(lhs, rhs); +} + test "CartesianTreeMap basic operations" { - var tree = CartesianTreeMap(i32, []const u8).init(testing.allocator); + var tree = CartesianTreeMap(i32, []const u8, i32Compare).init(testing.allocator); defer tree.deinit(); // Test insertion and retrieval @@ -388,7 +408,7 @@ test "CartesianTreeMap basic operations" { } test "CartesianTreeMap: empty tree operations" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try testing.expect(tree.isEmpty()); @@ -399,7 +419,7 @@ test "CartesianTreeMap: empty tree operations" { } test "CartesianTreeMap: single element" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try tree.putWithPriority(42, 100, 50); @@ -413,7 +433,7 @@ test "CartesianTreeMap: single element" { } test "CartesianTreeMap: priority ordering" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); // Higher priority should be closer to root @@ -426,7 +446,7 @@ test "CartesianTreeMap: priority ordering" { } test "CartesianTreeMap: update existing key with different priority" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try tree.putWithPriority(10, 100, 50); @@ -437,7 +457,7 @@ test "CartesianTreeMap: update existing key with different priority" { } test "CartesianTreeMap: random priorities with put" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); // Using put which generates random priorities @@ -452,7 +472,7 @@ test "CartesianTreeMap: random priorities with put" { } test "CartesianTreeMap: sequential keys" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); var i: i32 = 0; @@ -469,7 +489,7 @@ test "CartesianTreeMap: sequential keys" { } test "CartesianTreeMap: remove non-existent key" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try tree.putWithPriority(10, 10, 10); @@ -481,7 +501,7 @@ test "CartesianTreeMap: remove non-existent key" { } test "CartesianTreeMap: remove all elements" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try tree.putWithPriority(1, 1, 1); @@ -497,7 +517,7 @@ test "CartesianTreeMap: remove all elements" { } test "CartesianTreeMap: negative keys" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try tree.putWithPriority(-10, 10, 100); @@ -510,7 +530,7 @@ test "CartesianTreeMap: negative keys" { } test "CartesianTreeMap: iterator traversal" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); try tree.putWithPriority(30, 30, 30); @@ -532,7 +552,7 @@ test "CartesianTreeMap: iterator traversal" { } test "CartesianTreeMap: large dataset" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); var i: i32 = 0; @@ -549,7 +569,7 @@ test "CartesianTreeMap: large dataset" { } test "CartesianTreeMap: same priorities different keys" { - var tree = CartesianTreeMap(i32, i32).init(testing.allocator); + var tree = CartesianTreeMap(i32, i32, i32Compare).init(testing.allocator); defer tree.deinit(); // When priorities are equal, BST property still maintained by key diff --git a/src/ordered/red_black_tree_set.zig b/src/ordered/red_black_tree_set.zig index 6f8a4b6..5bdcab1 100644 --- a/src/ordered/red_black_tree_set.zig +++ b/src/ordered/red_black_tree_set.zig @@ -35,22 +35,25 @@ const Allocator = std.mem.Allocator; const testing = std.testing; const assert = std.debug.assert; -/// Creates a Red-black tree type for the given data type and comparison context. +/// Creates a Red-black tree type for the given data type and key-comparison function. +/// +/// The `compare` function is the same three-way comparator used by every other +/// generic-key container in the library (`BTreeMap`, `SkipListMap`, `SortedSet`, +/// and `CartesianTreeMap`), so tree types compose uniformly. /// /// ## Parameters /// - `T`: The data type to store in the tree -/// - `Context`: A type providing a `lessThan(ctx, a, b) bool` method for comparison +/// - `compare`: Three-way comparison function returning `std.math.Order` /// /// ## Example /// ```zig -/// const Context = struct { -/// pub fn lessThan(_: @This(), a: i32, b: i32) bool { -/// return a < b; -/// } -/// }; -/// var tree = RedBlackTreeSet(i32, Context).init(allocator, .{}); +/// fn i32Order(a: i32, b: i32) std.math.Order { return std.math.order(a, b); } +/// var tree = RedBlackTreeSet(i32, i32Order).init(allocator); /// ``` -pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { +pub fn RedBlackTreeSet( + comptime T: type, + comptime compare: fn (lhs: T, rhs: T) std.math.Order, +) type { return struct { const Self = @This(); @@ -74,19 +77,16 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { root: ?*Node, allocator: Allocator, - context: Context, size: usize, /// Creates a new empty Red-black tree. /// /// ## Parameters /// - `allocator`: Memory allocator for node allocation - /// - `context`: Comparison context instance - pub fn init(allocator: Allocator, context: Context) Self { + pub fn init(allocator: Allocator) Self { return Self{ .root = null, .allocator = allocator, - .context = context, .size = 0, }; } @@ -124,7 +124,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { /// Inserts or updates a value in the tree. /// - /// If the value already exists (as determined by the context's lessThan method), + /// If the value already exists (as determined by the `compare` function), /// it will be updated. Otherwise, a new node is created. /// /// Time complexity: O(log n) @@ -133,7 +133,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { /// Returns `error.OutOfMemory` if node allocation fails. pub fn put(self: *Self, data: T) !void { // Check if key already exists first to avoid unnecessary allocation - if (self.get(data)) |existing| { + if (self.getNode(data)) |existing| { existing.data = data; return; } @@ -160,7 +160,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { while (current != null) { parent = current; - if (self.context.lessThan(data, current.?.data)) { + if (compare(data, current.?.data) == .lt) { current = current.?.left; } else { current = current.?.right; @@ -168,7 +168,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { } new_node.parent = parent; - if (self.context.lessThan(data, parent.?.data)) { + if (compare(data, parent.?.data) == .lt) { parent.?.left = new_node; } else { parent.?.right = new_node; @@ -241,7 +241,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { /// /// Time complexity: O(log n) pub fn remove(self: *Self, data: T) ?T { - const node = self.get(data) orelse return null; + const node = self.getNode(data) orelse return null; const value = node.data; self.removeNode(node); self.size -= 1; @@ -379,7 +379,13 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { } fn rotateLeft(self: *Self, node: *Node) void { - const right = node.right orelse return; + // Left rotation requires the node to have a right child; callers + // in `fixInsert` / `fixDelete` must preserve this precondition. + // Previously this silently bailed out with `orelse return`, which + // could mask a balancing bug by leaving the tree in a subtly + // wrong shape. + std.debug.assert(node.right != null); + const right = node.right.?; node.right = right.left; if (right.left) |left| left.parent = node; @@ -401,7 +407,11 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { } fn rotateRight(self: *Self, node: *Node) void { - const left = node.left orelse return; + // Right rotation requires the node to have a left child. See + // note in `rotateLeft` for why this is an assertion rather than + // a silent early return. + std.debug.assert(node.left != null); + const left = node.left.?; node.left = left.right; if (left.right) |right| right.parent = node; @@ -422,35 +432,37 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { node.parent = left; } - /// Returns a pointer to the node containing the data. + /// Returns an immutable pointer to the stored value that compares equal + /// to `data`, or `null` if no such value exists. /// - /// Returns `null` if the data is not found. The returned node pointer can be used - /// to access or modify the data directly. + /// Time complexity: O(log n) + pub fn get(self: *const Self, data: T) ?*const T { + const node = self.getNode(data) orelse return null; + return &node.data; + } + + /// Checks whether the tree contains the given value. /// /// Time complexity: O(log n) - pub fn get(self: *const Self, data: T) ?*Node { + pub fn contains(self: *const Self, data: T) bool { + return self.getNode(data) != null; + } + + /// Internal: returns the node pointer for mutation by `put` and `remove`. + fn getNode(self: *const Self, data: T) ?*Node { var current = self.root; while (current) |node| { - if (self.context.lessThan(data, node.data)) { - current = node.left; - } else if (self.context.lessThan(node.data, data)) { - current = node.right; - } else { - return node; + switch (compare(data, node.data)) { + .lt => current = node.left, + .gt => current = node.right, + .eq => return node, } } return null; } - /// Checks whether the tree contains the given value. - /// - /// Time complexity: O(log n) - pub fn contains(self: *const Self, data: T) bool { - return self.get(data) != null; - } - fn findMinimum(self: *const Self, node: *Node) *Node { _ = self; // Mark as intentionally unused var current = node; @@ -460,19 +472,25 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { return current; } - pub fn minimum(self: Self, node: ?*Node) ?*Node { - const start = node orelse self.root orelse return null; - return self.findMinimum(start); + /// Returns the smallest value in the tree, or `null` if the tree is empty. + /// + /// Time complexity: O(log n) + pub fn minimum(self: *const Self) ?T { + const start = self.root orelse return null; + return self.findMinimum(start).data; } - pub fn maximum(self: Self, node: ?*Node) ?*Node { - var current = node orelse self.root orelse return null; + /// Returns the largest value in the tree, or `null` if the tree is empty. + /// + /// Time complexity: O(log n) + pub fn maximum(self: *const Self) ?T { + var current = self.root orelse return null; while (current.right) |right| { current = right; } - return current; + return current.data; } /// Iterator for in-order traversal @@ -482,9 +500,13 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { pub fn init(allocator: Allocator, root: ?*Node) !Iterator { var it = Iterator{ - .stack = .{}, + .stack = .empty, .allocator = allocator, }; + // Seeding the stack with the leftmost path issues a loop of + // appends. Without this errdefer, an OOM on the second or + // later append drops `it` without freeing its heap buffer. + errdefer it.stack.deinit(allocator); // Initialize stack with leftmost path var node = root; @@ -500,7 +522,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { self.stack.deinit(self.allocator); } - pub fn next(self: *Iterator) !?*Node { + pub fn next(self: *Iterator) !?T { if (self.stack.items.len == 0) return null; const node: *Node = self.stack.pop().?; @@ -512,7 +534,7 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { current = n.left; } - return node; + return node.data; } }; @@ -522,24 +544,13 @@ pub fn RedBlackTreeSet(comptime T: type, comptime Context: type) type { }; } -/// Default context for comparable types -pub fn DefaultContext(comptime T: type) type { - return struct { - pub fn lessThan(self: @This(), a: T, b: T) bool { - _ = self; - return a < b; - } - }; -} - -// Convenience type aliases -pub fn RedBlackTreeSetManaged(comptime T: type) type { - return RedBlackTreeSet(T, DefaultContext(T)); +fn i32Compare(lhs: i32, rhs: i32) std.math.Order { + return std.math.order(lhs, rhs); } test "RedBlackTreeSet: basic operations" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(10); @@ -554,7 +565,7 @@ test "RedBlackTreeSet: basic operations" { test "RedBlackTreeSet: empty tree operations" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try std.testing.expect(!tree.contains(42)); @@ -564,7 +575,7 @@ test "RedBlackTreeSet: empty tree operations" { test "RedBlackTreeSet: single element" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(42); @@ -580,7 +591,7 @@ test "RedBlackTreeSet: single element" { test "RedBlackTreeSet: duplicate insertions" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(10); @@ -593,7 +604,7 @@ test "RedBlackTreeSet: duplicate insertions" { test "RedBlackTreeSet: sequential insertion" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 0; @@ -612,7 +623,7 @@ test "RedBlackTreeSet: sequential insertion" { test "RedBlackTreeSet: reverse insertion" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var i: i32 = 50; @@ -626,7 +637,7 @@ test "RedBlackTreeSet: reverse insertion" { test "RedBlackTreeSet: remove from middle" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(10); @@ -645,7 +656,7 @@ test "RedBlackTreeSet: remove from middle" { test "RedBlackTreeSet: remove root" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(10); @@ -660,7 +671,7 @@ test "RedBlackTreeSet: remove root" { test "RedBlackTreeSet: minimum and maximum" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(10); @@ -669,18 +680,18 @@ test "RedBlackTreeSet: minimum and maximum" { try tree.put(3); try tree.put(20); - const min = tree.minimum(null); - const max = tree.maximum(null); + const min = tree.minimum(); + const max = tree.maximum(); try std.testing.expect(min != null); try std.testing.expect(max != null); - try std.testing.expectEqual(@as(i32, 3), min.?.data); - try std.testing.expectEqual(@as(i32, 20), max.?.data); + try std.testing.expectEqual(@as(i32, 3), min.?); + try std.testing.expectEqual(@as(i32, 20), max.?); } test "RedBlackTreeSet: iterator empty tree" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); var iter = try tree.iterator(); @@ -692,7 +703,7 @@ test "RedBlackTreeSet: iterator empty tree" { test "RedBlackTreeSet: clear" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(1); @@ -706,7 +717,7 @@ test "RedBlackTreeSet: clear" { test "RedBlackTreeSet: negative numbers" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(-10); @@ -719,15 +730,17 @@ test "RedBlackTreeSet: negative numbers" { try std.testing.expect(tree.contains(0)); } -test "RedBlackTreeSet: get returns correct node" { +test "RedBlackTreeSet: get returns correct value" { const allocator = std.testing.allocator; - var tree = RedBlackTreeSet(i32, DefaultContext(i32)).init(allocator, .{}); + var tree = RedBlackTreeSet(i32, i32Compare).init(allocator); defer tree.deinit(); try tree.put(10); try tree.put(20); - const node = tree.get(10); - try std.testing.expect(node != null); - try std.testing.expectEqual(@as(i32, 10), node.?.data); + const ptr = tree.get(10); + try std.testing.expect(ptr != null); + try std.testing.expectEqual(@as(i32, 10), ptr.?.*); + + try std.testing.expect(tree.get(99) == null); } diff --git a/src/ordered/skip_list_map.zig b/src/ordered/skip_list_map.zig index efdf8bc..4ea7e67 100644 --- a/src/ordered/skip_list_map.zig +++ b/src/ordered/skip_list_map.zig @@ -87,7 +87,13 @@ pub fn SkipListMap( header.forward = try allocator.alloc(?*Node, MAX_LEVEL); @memset(header.forward, null); - const rng = std.Random.DefaultPrng.init(@intCast(std.time.milliTimestamp())); + // Seed from a stack-address-derived hash. ASLR makes this differ + // across process runs, which is enough randomness for skip-list + // level selection. Not a cryptographic RNG. + var anchor: u8 = 0; + const addr = @intFromPtr(&anchor); + const seed: u64 = @truncate(std.hash.Wyhash.hash(0, std.mem.asBytes(&addr))); + const rng = std.Random.DefaultPrng.init(seed); return Self{ .header = header, diff --git a/src/ordered/sorted_set.zig b/src/ordered/sorted_set.zig index 3022289..b7f6081 100644 --- a/src/ordered/sorted_set.zig +++ b/src/ordered/sorted_set.zig @@ -29,7 +29,7 @@ pub fn SortedSet( pub fn init(allocator: std.mem.Allocator) Self { return .{ - .items = .{}, + .items = .empty, .allocator = allocator, }; } @@ -59,8 +59,11 @@ pub fn SortedSet( return true; } - /// Removes an element at a given index. - pub fn remove(self: *Self, index: usize) T { + /// Removes an element at a given index and returns it. + /// + /// Returns `null` if `index` is out of bounds. + pub fn remove(self: *Self, index: usize) ?T { + if (index >= self.items.items.len) return null; return self.items.orderedRemove(index); } @@ -68,7 +71,9 @@ pub fn SortedSet( /// Returns null if the value was not found. pub fn removeValue(self: *Self, value: T) ?T { const index = self.findIndex(value) orelse return null; - return self.remove(index); + // `remove` returns an optional for out-of-bounds safety, but the + // index came from `findIndex` which guarantees it is in range. + return self.remove(index).?; } /// Returns true if the set contains the given value. @@ -218,6 +223,22 @@ test "SortedSet: remove boundary cases" { try std.testing.expectEqualSlices(i32, &.{ 2, 4 }, set.items.items); } +test "regression: SortedSet remove returns null for out-of-bounds index" { + // Bug B9: `remove(index)` used to call `orderedRemove` directly, which + // panics on OOB. Now it returns null. + const allocator = std.testing.allocator; + var set = SortedSet(i32, i32Compare).init(allocator); + defer set.deinit(); + + try std.testing.expect(set.remove(0) == null); + try std.testing.expect(set.remove(99) == null); + + _ = try set.put(10); + try std.testing.expect(set.remove(1) == null); + try std.testing.expect(set.remove(0) != null); + try std.testing.expect(set.remove(0) == null); +} + test "SortedSet: removeValue method" { const allocator = std.testing.allocator; var set = SortedSet(i32, i32Compare).init(allocator); diff --git a/src/ordered/trie_map.zig b/src/ordered/trie_map.zig index e2d8727..2375137 100644 --- a/src/ordered/trie_map.zig +++ b/src/ordered/trie_map.zig @@ -121,7 +121,13 @@ pub fn TrieMap(comptime V: type) type { for (key) |char| { if (!current.children.contains(char)) { const new_node = try TrieNode.init(self.allocator); - try current.children.put(char, new_node); + // If the hashmap put fails (OOM), the freshly allocated + // `new_node` is not yet attached to any parent and would + // be orphaned; free it explicitly before propagating. + current.children.put(char, new_node) catch |err| { + new_node.deinit(self.allocator); + return err; + }; } current = current.children.get(char).?; } @@ -248,21 +254,26 @@ pub fn TrieMap(comptime V: type) type { const prefix_node = self.findNode(prefix); if (prefix_node == null) { return PrefixIterator{ - .stack = std.ArrayList(PrefixIteratorFrame){}, + .stack = .empty, .allocator = allocator, - .current_key = std.ArrayList(u8){}, + .current_key = .empty, .prefix_len = 0, }; } - var stack = std.ArrayList(PrefixIteratorFrame){}; + var stack: std.ArrayList(PrefixIteratorFrame) = .empty; + // On any error after this point, free the stack buffer; it is + // not yet owned by the returned PrefixIterator. + errdefer stack.deinit(allocator); try stack.append(allocator, PrefixIteratorFrame{ .node = prefix_node.?, .child_iter = prefix_node.?.children.iterator(), .visited_self = false, }); - var current_key = std.ArrayList(u8){}; + var current_key: std.ArrayList(u8) = .empty; + // Same rationale as `stack` above. + errdefer current_key.deinit(allocator); try current_key.appendSlice(allocator, prefix); return PrefixIterator{