diff --git a/.gitignore b/.gitignore
index f9082380e0..b479f86052 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+boxoban_maps_*.bin
+
 # Annoying temp files generated by Cython
 c_*.c
 pufferlib/extensions.c
@@ -162,3 +164,5 @@ pufferlib/ocean/impulse_wars/*-release/
 pufferlib/ocean/impulse_wars/debug-*/
 pufferlib/ocean/impulse_wars/release-*/
 pufferlib/ocean/impulse_wars/benchmark/
+
+*.dSYM/
diff --git a/pufferlib/config/ocean/boxoban.ini b/pufferlib/config/ocean/boxoban.ini
new file mode 100644
index 0000000000..f2e912d19c
--- /dev/null
+++ b/pufferlib/config/ocean/boxoban.ini
@@ -0,0 +1,57 @@
+[base]
+package = ocean
+env_name = puffer_boxoban
+policy_name = Policy
+rnn_name = Recurrent
+
+[vec]
+num_envs = 2
+
+[env]
+num_envs = 1024
+#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered
+difficulty = 1
+#reward per intermediate target (once per episode)
+int_r_coeff = 0.25
+#moving box off target
+target_loss_pen_coeff = 0.0
+
+[policy]
+
+
+[train]
+
+#EASY
+adam_beta1 = 0.8731132476489148
+adam_beta2 = 0.97965686417704
+adam_eps = 0.00000000008123794869
+anneal_lr = "true"
+batch_size = "auto"
+bptt_horizon = 64
+clip_coef = 0.01
+ent_coef = 0.01595981947421829
+gae_lambda = 0.6982154990440731
+gamma = 0.98663093763856
+learning_rate = 0.03199264297422195
+max_grad_norm = 0.5768091592872416
+max_minibatch_size = 32768
+min_lr_ratio = 0.37872027027338984
+minibatch_size = 8192
+optimizer = "muon"
+precision = "float32"
+prio_alpha = 0.99
+prio_beta0 = 0.930949266538068
+total_timesteps = 82565313
+update_epochs = 1
+use_rnn = true
+vf_clip_coef = 2.9197817585307435
+vf_coef = 4.787362674459031
+vtrace_c_clip = 4.90924508575585
+vtrace_rho_clip = 4.073806432722373
+
+
+[sweep.train.minibatch_size]
+distribution = uniform_pow2
+min = 4096
+max = 32768
+scale = auto
diff --git a/pufferlib/ocean/boxoban/binding.c b/pufferlib/ocean/boxoban/binding.c
new file mode 100644
index 0000000000..6d0749cc10
--- /dev/null
+++ b/pufferlib/ocean/boxoban/binding.c
@@ -0,0 +1,66 @@
+#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap
+#include "boxoban.h"
+#define Env Boxoban
+#include "../env_binding.h"
+
+static int parse_difficulty_id(PyObject* kwargs, int* out_difficulty_id) {
+    int difficulty_id = 0;
+    PyObject* difficulty_obj = PyDict_GetItemString(kwargs, "difficulty");
+    if (difficulty_obj != NULL) {
+        if (PyLong_Check(difficulty_obj)) {
+            long parsed_id = PyLong_AsLong(difficulty_obj);
+            if (boxoban_difficulty_name_from_id((int)parsed_id) == NULL) {
+                PyErr_Format(
+                    PyExc_ValueError,
+                    "Boxoban 'difficulty' int must be in [0, 4], got %ld (0=basic, 1=easy, 2=medium, 3=hard, 4=unfiltered)",
+                    parsed_id
+                );
+                return -1;
+            }
+            difficulty_id = (int)parsed_id;
+        } else if (PyUnicode_Check(difficulty_obj)) {
+            const char* difficulty_name = PyUnicode_AsUTF8(difficulty_obj);
+            if (difficulty_name == NULL) {
+                return -1;
+            }
+            difficulty_id = boxoban_difficulty_id_from_name(difficulty_name);
+            if (difficulty_id < 0) {
+                PyErr_Format(
+                    PyExc_ValueError,
+                    "Boxoban 'difficulty' string must be one of: basic, easy, medium, hard, unfiltered (got '%s')",
+                    difficulty_name
+                );
+                return -1;
+            }
+        } else {
+            PyErr_SetString(
+                PyExc_TypeError,
+                "Boxoban 'difficulty' must be an int (0..4) or string (basic/easy/medium/hard/unfiltered)"
+            );
+            return -1;
+        }
+    }
+    *out_difficulty_id = difficulty_id;
+    return 0;
+}
+
+static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
+    if (parse_difficulty_id(kwargs, &env->difficulty_id) != 0) {
+        return -1;
+    }
+    env->size = (int)unpack(kwargs, "size");
+    env->max_steps = (int)unpack(kwargs, "max_steps");
+    env->int_r_coeff = (float)unpack(kwargs, "int_r_coeff");
+    env->target_loss_pen_coeff = (float)unpack(kwargs, "target_loss_pen_coeff");
+    init(env);
+    return 0;
+}
+
+static int my_log(PyObject* dict, Log* log) {
+    assign_to_dict(dict, "perf", log->perf);
+    assign_to_dict(dict, "score", log->score);
+    assign_to_dict(dict, "episode_return", log->episode_return);
+    assign_to_dict(dict, "episode_length", log->episode_length);
+    assign_to_dict(dict, "targets_hit", log->on_targets);
+    return 0;
+}
diff --git a/pufferlib/ocean/boxoban/boxoban.c b/pufferlib/ocean/boxoban/boxoban.c
new file mode 100644
index 0000000000..b6a9a1d21b
--- /dev/null
+++ b/pufferlib/ocean/boxoban/boxoban.c
@@ -0,0 +1,194 @@
+/* Pure C demo file for Boxoban. Usage:
+ *   bash scripts/build_ocean.sh boxoban
+ *   ./boxoban [difficulty|path_to_bin]
+ *
+ * If you pass one of the known difficulty names (basic, easy, medium,
+ * hard, unfiltered) the demo looks for pufferlib/ocean/boxoban/boxoban_maps_<difficulty>.bin
+ * Otherwise the argument is treated as an explicit path to a bin file.
+ */
+
+#define BOXOBAN_MAPS_IMPLEMENTATION
+#include "boxoban.h"
+
+static int is_named_difficulty(const char* arg) {
+    return strcmp(arg, "basic") == 0 ||
+        strcmp(arg, "easy") == 0 ||
+        strcmp(arg, "medium") == 0 ||
+        strcmp(arg, "hard") == 0 ||
+        strcmp(arg, "unfiltered") == 0;
+}
+
+static const char* resolve_map_path(int argc, char** argv, char* buffer, size_t buf_sz) {
+    const char* arg = argc > 1 ? argv[1] : NULL;
+    if (arg == NULL) {
+        if (boxoban_prepare_maps_for_difficulty("easy", buffer, buf_sz) != 0) {
+            return NULL;
+        }
+        return buffer;
+    }
+    if (strchr(arg, '/')) {
+        return arg;
+    }
+    if (is_named_difficulty(arg)) {
+        if (boxoban_prepare_maps_for_difficulty(arg, buffer, buf_sz) != 0) {
+            return NULL;
+        }
+        return buffer;
+    }
+    snprintf(buffer, buf_sz, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", arg);
+    return buffer;
+}
+
+
+int demo(int argc, char** argv) {
+    char path_buffer[512];
+    const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
+    if (chosen_path == NULL) {
+        fprintf(stderr, "Failed to prepare map path\n");
+        return 1;
+    }
+    if (boxoban_set_map_path(chosen_path) != 0) {
+        fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
+        return 1;
+    }
+
+    Boxoban env = {
+        .size = 10,
+        .observations = NULL,
+        .actions = NULL,
+        .rewards = NULL,
+        .terminals = NULL,
+        .max_steps = 500,
+        .int_r_coeff = 0.1f,
+        .target_loss_pen_coeff = 0.5f,
+        .tick = 0,
+        .agent_x = 0,
+        .agent_y = 0,
+        .intermediate_rewards = NULL,
+        .on_target = 0,
+        .n_boxes = 0,
+        .win = 0,
+        .difficulty_id = -1,
+        .client = NULL,
+        .n_targets = 0,
+
+    };
+
+    size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
+    env.observations = calloc(obs_count, sizeof(unsigned char));
+    env.actions = calloc(1, sizeof(int));
+    env.rewards = calloc(1, sizeof(float));
+    env.terminals = calloc(1, sizeof(unsigned char));
+
+    init(&env);
+    c_reset(&env);
+    c_render(&env);
+    while (!WindowShouldClose()) {
+        if (IsKeyPressed(KEY_LEFT_SHIFT) || IsKeyPressed(KEY_RIGHT_SHIFT)) {
+            TraceLog(LOG_INFO, "Shift key pressed");
+        }
+        bool manual = IsKeyDown(KEY_LEFT_SHIFT) || IsKeyDown(KEY_RIGHT_SHIFT);
+        bool stepped = false;
+        if (manual) {
+            int new_action = -1;
+            if (IsKeyDown(KEY_UP)    || IsKeyDown(KEY_W)) new_action = UP;
+            if (IsKeyDown(KEY_DOWN)  || IsKeyDown(KEY_S)) new_action = DOWN;
+            if (IsKeyDown(KEY_LEFT)  || IsKeyDown(KEY_A)) new_action = LEFT;
+            if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) new_action = RIGHT;
+
+            if (new_action >= 0) {
+                env.actions[0] = new_action;
+                c_step(&env);
+                stepped = true;
+            }
+        } else {
+            env.actions[0] = rand() % 5;
+            c_step(&env);
+            stepped = true;
+        }
+
+        if (!stepped) {
+            // Manual mode with no direction: stay paused
+        }
+        c_render(&env);
+    }
+    free(env.observations);
+    free(env.actions);
+    free(env.rewards);
+    free(env.terminals);
+    c_close(&env);
+    return 0;
+}
+
+void test_performance(int argc, char** argv, int timeout) {
+    char path_buffer[512];
+    const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
+    if (chosen_path == NULL) {
+        fprintf(stderr, "Failed to prepare map path\n");
+        return;
+    }
+    if (boxoban_set_map_path(chosen_path) != 0) {
+        fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
+        return;
+    }
+    printf("Loaded map: %s\n", chosen_path);
+
+    Boxoban env = {
+        .size = 10,
+        .observations = NULL,
+        .actions = NULL,
+        .rewards = NULL,
+        .terminals = NULL,
+        .max_steps = 500,
+        .int_r_coeff = 0.1f,
+        .target_loss_pen_coeff = 0.5f,
+        .tick = 0,
+        .agent_x = 0,
+        .agent_y = 0,
+        .intermediate_rewards = NULL,
+        .on_target = 0,
+        .n_boxes = 0,
+        .win = 0,
+        .difficulty_id = -1,
+        .client = NULL,
+        .n_targets = 0,
+    };
+
+    size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
+    env.observations = calloc(obs_count, sizeof(unsigned char));
+    env.actions = calloc(1, sizeof(int));
+    env.rewards = calloc(1, sizeof(float));
+    env.terminals = calloc(1, sizeof(unsigned char));
+
+    printf("Initializing...\n");
+    init(&env);
+    printf("Resetting...\n");
+    c_reset(&env);
+    printf("Starting test...\n");
+
+    int start = time(NULL);
+    int num_steps = 0;
+    while (time(NULL) - start < timeout) {
+        env.actions[0] = rand() % 5;
+        c_step(&env);
+        num_steps++;
+    }
+
+    int end = time(NULL);
+    float sps = num_steps / (end - start);
+    printf("Test Environment SPS: %f\n", sps);
+    free(env.observations);
+    free(env.actions);
+    free(env.rewards);
+    free(env.terminals);
+    c_close(&env);
+}
+
+int main(int argc, char** argv) {
+    demo(argc, argv);
+    setbuf(stdout, NULL);
+    fprintf(stderr, "Entered main\n");
+    fflush(stderr);
+    //test_performance(argc, argv,10);
+    return 0;
+}
diff --git a/pufferlib/ocean/boxoban/boxoban.h b/pufferlib/ocean/boxoban/boxoban.h
new file mode 100644
index 0000000000..6e483ba9e2
--- /dev/null
+++ b/pufferlib/ocean/boxoban/boxoban.h
@@ -0,0 +1,418 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include "raylib.h"
+#include "boxoban_maps.h"
+
+const unsigned char NOOP = 0;
+const unsigned char DOWN = 1;
+const unsigned char UP = 2;
+const unsigned char LEFT = 3;
+const unsigned char RIGHT = 4;
+
+const unsigned char AGENT = 0;
+const unsigned char WALLS = 1;
+const unsigned char BOXES = 2;
+const unsigned char TARGET = 3;
+
+// Required struct. Only use floats!
+typedef struct {
+    float perf; // Recommended 0-1 normalized single real number perf metric
+    float score; // Recommended unnormalized single real number perf metric
+    float episode_return; // Recommended metric: sum of agent rewards over episode
+    float episode_length; // Recommended metric: number of steps of agent episode
+    // Any extra fields you add here may be exported to Python in binding.c
+    float on_targets; // Number of targets currently boxed
+    float n; // Required as the last field 
+} Log;
+
+typedef struct {
+    Texture2D wall;
+    Texture2D box;
+    Texture2D target;
+    Texture2D floor;
+    Texture2D agent;
+    Texture2D box_on_target;
+} Client;
+
+// Required that you have some struct for your env
+// Recommended that you name it the same as the env file
+typedef struct {
+    Log log; // Required field. Env binding code uses this to aggregate logs
+    unsigned char* observations; // Required. You can use any obs type, but make sure it matches in Python!
+    int* actions; // Required. int* for discrete/multidiscrete, float* for box
+    float* rewards; // Required
+    unsigned char* terminals; // Required. We don't yet have truncations as standard yet
+    int size;
+    int tick;
+    int max_steps;
+    int agent_x;
+    int agent_y;
+    unsigned char* intermediate_rewards;
+    float int_r_coeff;
+    float target_loss_pen_coeff;
+    int on_target; //num targets currently boxed
+    int n_boxes; //boxes in map
+    int n_targets; //targets in map
+    int difficulty_id; // 0=basic,1=easy,2=medium,3=hard,4=unfiltered
+    Client* client;
+    int win;
+} Boxoban;
+
+void ensure_map_loaded(void);
+
+static int boxoban_configure_maps_from_env(Boxoban* env) {
+    if (env->difficulty_id == -1) {
+        return 0;
+    }
+
+    if (env->difficulty_id < -1) {
+        fprintf(stderr, "Invalid Boxoban difficulty id %d\n", env->difficulty_id);
+        return -1;
+    }
+
+    const char* difficulty_name = boxoban_difficulty_name_from_id(env->difficulty_id);
+    if (difficulty_name == NULL) {
+        fprintf(stderr, "Invalid Boxoban difficulty id %d\n", env->difficulty_id);
+        return -1;
+    }
+    char prepared_path[512];
+    if (boxoban_prepare_maps_for_difficulty(difficulty_name, prepared_path, sizeof(prepared_path)) != 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+//Entity,x,y  convention y moves top to bottom
+
+static inline void set_entity(Boxoban *env, int entity, int x, int y, unsigned char value) {
+    env->observations[(entity)*env->size*env->size + (y)*env->size + (x)] = value;
+}
+
+static inline unsigned char get_entity(Boxoban *env, int entity, int x, int y) {
+    return env->observations[(entity)*env->size*env->size + (y)*env->size + (x)];
+}
+
+static inline void set_intermediate_reward(Boxoban *env, int x, int y, unsigned char value) {
+    env->intermediate_rewards[(y)*env->size + (x)] = value;
+}
+
+static inline unsigned char get_intermediate_reward_status(Boxoban *env, int x, int y) {
+    return env->intermediate_rewards[(y)*env->size + (x)];
+}
+
+static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) {
+    int idx = rand() % PUZZLE_COUNT;
+    return idx;
+}
+
+
+void init (Boxoban* env) {
+    if (boxoban_configure_maps_from_env(env) != 0) {
+        fprintf(stderr, "Failed to configure Boxoban maps\n");
+        abort();
+    }
+    ensure_map_loaded();
+    env->intermediate_rewards = calloc(env->size*env->size, sizeof(unsigned char));
+    env->win = 0;
+  }
+
+
+void add_log(Boxoban* env) {
+    float denom = (float)env->n_boxes;
+    float num = (float)env->on_target;
+    env->log.perf += (env->win== 1) ? 1.0 : num/denom;
+    env->log.score += env->rewards[0];
+    env->log.episode_length += env->tick;
+    env->log.episode_return += env->rewards[0];
+    env->log.on_targets += env->on_target;
+    env->log.n++;
+}
+
+
+bool clear(Boxoban* env, int x, int y) {
+    if (x < 0 || y < 0 || x >= env->size || y >= env->size) {
+        return false;
+    }
+    return (get_entity(env, WALLS, x, y) == 0) && (get_entity(env, BOXES, x, y) == 0);
+}
+
+// Required function
+void c_reset(Boxoban* env) {
+    const uint32_t i = get_random_puzzle_idx(env);
+    const uint8_t* puzzle = MAP_BASE + (size_t)i * PUZZLE_SIZE;
+    memcpy(env->observations, puzzle, PUZZLE_OBS_BYTES);
+
+    const uint8_t* meta = puzzle + PUZZLE_OBS_BYTES;
+    env->agent_x = (int)meta[0];
+    env->agent_y = (int)meta[1];
+    env->n_boxes = (int)meta[2];
+    env->n_targets = (int)meta[3];
+    env->on_target = (int)meta[4];
+
+    memcpy(env->intermediate_rewards,
+            env->observations + TARGET * env->size * env->size,env->size * env->size);
+
+    env->tick = 0;
+    env->win = 0;
+
+}
+
+//Updates OBS for moved entity
+void move_entity(Boxoban* env,unsigned char entity,int x, int y, int dx, int dy) {
+    set_entity(env, entity, x, y, 0);
+    set_entity(env, entity, x + dx, y + dy, 1);
+}
+
+//Updates state and intermediate reward array in place
+int take_action(Boxoban* env, int action) {
+
+    int dx = 0;
+    int dy = 0;
+    int int_r = 0;
+
+    if (action == NOOP) {
+        return 0;
+    }
+    else if (action == DOWN) {
+        dy = 1;
+    }
+    else if (action == UP) {
+        dy = -1;
+    }
+    else if (action == LEFT) {
+        dx = -1;
+    }
+    else if (action == RIGHT) {
+        dx = 1;
+    }
+
+    //if move space is clear, move agent
+    if (clear(env, env->agent_x + dx, env->agent_y + dy)) {
+        
+        move_entity(env, AGENT, env->agent_x, env->agent_y, dx, dy);
+        env->agent_y += dy;
+        env->agent_x += dx;
+        return 0;
+    }
+    //if its not clear, but its a box and box is clear to move, move both
+    else if (clear(env, env->agent_x+ 2*dx, env->agent_y + 2*dy)
+            && get_entity(env, BOXES, env->agent_x + dx, env->agent_y + dy) == 1) {
+
+            //if box is on target currently, remove from on_target count
+            if (get_entity(env, TARGET, env->agent_x + dx, env->agent_y + dy) == 1) {
+
+                env->on_target -= 1;
+            }
+            //move both entities
+            move_entity(env, BOXES, env->agent_x + dx, env->agent_y + dy, dx, dy);
+            move_entity(env, AGENT, env->agent_x, env->agent_y, dx, dy);
+            env->agent_y += dy;
+            env->agent_x += dx;
+        
+            //if box is now on target, add to on_target count
+            //if its a new target recieve intermediate reward and zero out intermediate reward
+            if (get_entity(env, TARGET, env->agent_x + dx, env->agent_y + dy) == 1) {
+                
+                env->on_target += 1;
+                int_r = get_intermediate_reward_status(env, env->agent_x + dx, env->agent_y + dy);
+                set_intermediate_reward(env, env->agent_x + dx, env->agent_y + dy, 0);
+            }
+            return int_r;
+    }
+    return 0;
+}
+
+// Required function
+void c_step(Boxoban* env) {
+    env->tick += 1;
+    env->terminals[0] = 0;
+    env->rewards[0] = 0.0;
+       
+    int action = env->actions[0];
+
+    float on_target = env->on_target;
+    int int_r = take_action(env, action); //int_r _new_ tgts covered, modifies observations in place
+    float on_target_after = env->on_target;
+                                          
+    env->rewards[0] += (float)int_r * env->int_r_coeff; //coeff in .ini
+ 
+    if (on_target_after < on_target) { //target loss penalty
+        env->rewards[0] -= env->target_loss_pen_coeff; //coeff in .ini
+    }
+
+    //Terminals
+    if (env->on_target == env->n_targets) {
+        env->terminals[0] = 1;
+        env->rewards[0] += 1.0;
+        env->win = 1;
+        add_log(env);
+        c_reset(env);
+        return;
+    }
+
+    if (env->tick >= env->max_steps) {
+        env->terminals[0] = 1;
+        env->rewards[0] -= 1.0;
+        add_log(env);
+        c_reset(env);
+        return;
+    }
+
+}
+
+/*Rendering stuff*/
+
+Client* c_create(Boxoban* env) {
+    Client* client = calloc(1,sizeof(Client));
+    const char *sprite_search_paths[] = {
+        "sprites_pack/PNG",
+        "pufferlib/ocean/boxoban/sprites_pack/PNG",
+        "../pufferlib/ocean/boxoban/sprites_pack/PNG",
+    };
+    const char *sprite_base = NULL;
+    for (unsigned i = 0; i < sizeof(sprite_search_paths)/sizeof(sprite_search_paths[0]); i++) {
+        if (DirectoryExists(sprite_search_paths[i])) {
+            sprite_base = sprite_search_paths[i];
+            break;
+        }
+    }
+    if (sprite_base == NULL) {
+        TraceLog(LOG_WARNING, "Boxoban sprites not found next to executable, using default relative path");
+        sprite_base = "sprites_pack/PNG";
+    }
+
+    char resource_path[256] = {0};
+
+    snprintf(resource_path, sizeof(resource_path), "%s/Wall_black.png", sprite_base);
+    client->wall = LoadTexture(resource_path);
+    snprintf(resource_path, sizeof(resource_path), "%s/Crate_black.png", sprite_base);
+    client->box = LoadTexture(resource_path);
+    snprintf(resource_path, sizeof(resource_path), "%s/EndPoint_black.png", sprite_base);
+    client->target = LoadTexture(resource_path);
+    snprintf(resource_path, sizeof(resource_path), "%s/GroundGravel_Concrete.png", sprite_base);
+    client->floor = LoadTexture(resource_path);
+    snprintf(resource_path, sizeof(resource_path), "%s/EndPoint_Blue.png", sprite_base);
+    client->box_on_target = LoadTexture(resource_path);
+    client->agent = LoadTexture("resources/shared/puffers_128.png");
+
+    env-> client = client;
+    return client;
+}
+
+#define TILE 32
+
+Texture2D choose_sprite(Client *c, Boxoban *env, int x, int y) {
+    int a = get_entity(env, AGENT, x, y);
+    int w = get_entity(env, WALLS, x, y);
+    int b = get_entity(env, BOXES, x, y);
+    int t = get_entity(env, TARGET, x, y);
+
+    if (w) return c->wall;
+    if (b && t) return c->box_on_target;
+    if (b) return c->box;
+    if (a) return c->agent;
+    if (t) return c->target;
+
+    return c->floor;
+}
+
+void draw_tile(Boxoban *env, int x, int y) {
+      Client *c = env->client;
+      Rectangle dest = {x * TILE, y * TILE, TILE, TILE};
+
+      // Always lay down the base tile
+      DrawTexturePro(
+          c->floor,
+          (Rectangle){0, 0, (float)c->floor.width, (float)c->floor.height},
+          dest,
+          (Vector2){0, 0},
+          0.0f,
+          WHITE);
+
+      if (get_entity(env, TARGET, x, y)) {
+          DrawTexturePro(
+              c->target,
+              (Rectangle){0, 0, (float)c->target.width, (float)c->target.height},
+              dest,
+              (Vector2){0, 0},
+              0.0f,
+              WHITE);
+      }
+      if (get_entity(env, BOXES, x, y)) {
+          Texture2D tex = get_entity(env, TARGET, x, y) ? c->box_on_target : c->box;
+          DrawTexturePro(
+              tex,
+              (Rectangle){0, 0, (float)tex.width, (float)tex.height},
+              dest,
+              (Vector2){0, 0},
+              0.0f,
+              WHITE);
+      }
+      if (get_entity(env, WALLS, x, y)) {
+          DrawTexturePro(
+              c->wall,
+              (Rectangle){0, 0, (float)c->wall.width, (float)c->wall.height},
+              dest,
+              (Vector2){0, 0},
+              0.0f,
+              WHITE);
+      }
+      if (get_entity(env, AGENT, x, y)) {
+          Rectangle src = {0, 0, c->agent.width / 2.0f, (float)c->agent.height};
+          DrawTexturePro(c->agent, src, dest, (Vector2){0, 0}, 0.0f, WHITE);
+      }
+  }
+
+
+// Required function. Should handle creating the client on first call
+void c_render(Boxoban* env) {
+    if (!IsWindowReady()) {
+        InitWindow(TILE*env->size, TILE*env->size, "PufferLib Boxoban");
+        SetTargetFPS(10);
+    }
+
+    // Standard across our envs so exiting is always the same
+    if (IsKeyDown(KEY_ESCAPE)) {
+        exit(0);
+    }
+
+    if (env->client == NULL) {
+        env->client = c_create(env);
+    }
+
+    BeginDrawing();
+    ClearBackground((Color){6, 24, 24, 255});
+
+    for (int y = 0; y < env->size; y++) {
+        for (int x = 0; x < env->size; x++) {
+            draw_tile(env, x, y);
+        }
+    }
+
+
+    EndDrawing();
+}
+
+// Required function. Should clean up anything you allocated
+// Do not free env->observations, actions, rewards, terminals
+void c_close(Boxoban* env) {
+    if (env->intermediate_rewards) {
+          free(env->intermediate_rewards);
+          env->intermediate_rewards = NULL;
+      }
+    if (IsWindowReady()) {
+        if (env->client) {
+            UnloadTexture(env->client->wall);
+            UnloadTexture(env->client->box);
+            UnloadTexture(env->client->target);
+            UnloadTexture(env->client->floor);
+            UnloadTexture(env->client->agent);
+            free(env->client);
+            env->client = NULL;
+        }
+        CloseWindow();
+    }
+}
diff --git a/pufferlib/ocean/boxoban/boxoban.py b/pufferlib/ocean/boxoban/boxoban.py
new file mode 100644
index 0000000000..1c1ff460c0
--- /dev/null
+++ b/pufferlib/ocean/boxoban/boxoban.py
@@ -0,0 +1,71 @@
+import gymnasium
+import numpy as np
+
+import pufferlib
+from pufferlib.ocean.boxoban import binding
+
+class Boxoban(pufferlib.PufferEnv):
+    def __init__(self, num_envs=1, render_mode=None, log_interval=128, size=10, buf=None, seed=0, difficulty=0, max_steps = 500,int_r_coeff = 0.1, target_loss_pen_coeff = 0.5):
+        self.shape = size*size*4 #agents walls boxes targets OHE
+        self.difficulty = difficulty
+        self._difficulty_stat_key = f"difficulty ({self.difficulty})"
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=1,
+            shape=(self.shape,), dtype=np.uint8)
+        self.single_action_space = gymnasium.spaces.Discrete(5)
+        self.render_mode = render_mode
+        self.num_agents = num_envs
+        self.log_interval = log_interval
+        self.max_steps = max_steps
+        self.int_r_coeff = int_r_coeff
+        self.target_loss_pen_coeff = target_loss_pen_coeff
+
+        super().__init__(buf)
+        self.c_envs = binding.vec_init(self.observations, self.actions, self.rewards,
+            self.terminals, self.truncations, num_envs, seed, size=size, max_steps = self.max_steps, int_r_coeff = self.int_r_coeff, target_loss_pen_coeff = self.target_loss_pen_coeff, difficulty=self.difficulty)
+ 
+    def reset(self, seed=0):
+        binding.vec_reset(self.c_envs, seed)
+        self.tick = 0
+        return self.observations, []
+
+    def step(self, actions):
+        self.tick += 1
+
+        self.actions[:] = actions
+        binding.vec_step(self.c_envs)
+
+        info = []
+        if self.tick % self.log_interval == 0:
+            log_dict = binding.vec_log(self.c_envs)
+            log_dict[self._difficulty_stat_key] = 1.0
+            info.append(log_dict)
+
+        return (self.observations, self.rewards,
+            self.terminals, self.truncations, info)
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+
+if __name__ == '__main__':
+    N = 1
+
+    env = Boxoban(num_envs=N)
+    env.reset()
+    env.render()
+    steps = 0
+
+    CACHE = 1024
+    actions = np.random.randint(0, 5, (CACHE, N))
+
+    i = 0
+    import time
+    start = time.time()
+    while time.time() - start < 10:
+        env.step(actions[i % CACHE])
+        steps += N
+        i += 1
+
+    print('Boxoban SPS:', int(steps / (time.time() - start)))
diff --git a/pufferlib/ocean/boxoban/boxoban_maps.h b/pufferlib/ocean/boxoban/boxoban_maps.h
new file mode 100644
index 0000000000..437043d454
--- /dev/null
+++ b/pufferlib/ocean/boxoban/boxoban_maps.h
@@ -0,0 +1,453 @@
+#ifndef PUFFERLIB_OCEAN_BOXOBAN_MAPS_H
+#define PUFFERLIB_OCEAN_BOXOBAN_MAPS_H
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "generate_maps.h"
+#include "parse_maps.h"
+
+/*
+Maps are stored in binary files keyed by difficulty.
+If the bin does not exist it is created on the fly, then mmapped and shared by envs.
+*/
+
+extern uint8_t *MAP_BASE;
+extern size_t MAP_FILESIZE;
+extern size_t PUZZLE_COUNT;
+extern size_t PUZZLE_SIZE;
+extern size_t PUZZLE_OBS_BYTES;
+
+int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
+int boxoban_set_map_path(const char *path);
+int boxoban_difficulty_id_from_name(const char* difficulty_name);
+const char* boxoban_difficulty_name_from_id(int difficulty_id);
+void ensure_map_loaded(void);
+
+#ifdef BOXOBAN_MAPS_IMPLEMENTATION
+
+uint8_t *MAP_BASE = NULL;
+size_t MAP_FILESIZE = 0;
+size_t PUZZLE_COUNT = 0;
+size_t PUZZLE_SIZE = BOXOBAN_PUZZLE_BYTES;
+size_t PUZZLE_OBS_BYTES = BOXOBAN_PUZZLE_OBS_BYTES;
+static char* BOXOBAN_MAP_PATH = NULL;
+static const char* BOXOBAN_LEVEL_ROOT = "pufferlib/ocean/boxoban/boxoban-levels";
+
+typedef struct {
+    char** items;
+    size_t count;
+    size_t cap;
+} BoxobanPathList;
+
+static int boxoban_cmp_strings(const void* a, const void* b) {
+    const char* const* sa = (const char* const*)a;
+    const char* const* sb = (const char* const*)b;
+    return strcmp(*sa, *sb);
+}
+
+static void boxoban_path_list_free(BoxobanPathList* list) {
+    for (size_t i = 0; i < list->count; i++) {
+        free(list->items[i]);
+    }
+    free(list->items);
+    list->items = NULL;
+    list->count = 0;
+    list->cap = 0;
+}
+
+static int boxoban_path_list_append(BoxobanPathList* list, const char* path) {
+    if (list->count == list->cap) {
+        size_t next_cap = list->cap == 0 ? 64 : list->cap * 2;
+        char** next = (char**)realloc(list->items, next_cap * sizeof(char*));
+        if (next == NULL) {
+            return -1;
+        }
+        list->items = next;
+        list->cap = next_cap;
+    }
+    char* copied = (char*)malloc(strlen(path) + 1);
+    if (copied == NULL) {
+        return -1;
+    }
+    strcpy(copied, path);
+    list->items[list->count++] = copied;
+    return 0;
+}
+
+static int boxoban_has_txt_suffix(const char* name) {
+    size_t len = strlen(name);
+    return len >= 4 && strcmp(name + len - 4, ".txt") == 0;
+}
+
+int boxoban_difficulty_id_from_name(const char* difficulty_name) {
+    if (difficulty_name == NULL) {
+        return -1;
+    }
+
+    if (strcmp(difficulty_name, "basic") == 0) {
+        return 0;
+    }
+    if (strcmp(difficulty_name, "easy") == 0) {
+        return 1;
+    }
+    if (strcmp(difficulty_name, "medium") == 0) {
+        return 2;
+    }
+    if (strcmp(difficulty_name, "hard") == 0) {
+        return 3;
+    }
+    if (strcmp(difficulty_name, "unfiltered") == 0) {
+        return 4;
+    }
+
+    return -1;
+}
+
+const char* boxoban_difficulty_name_from_id(int difficulty_id) {
+    switch (difficulty_id) {
+    case 0:
+        return "basic";
+    case 1:
+        return "easy";
+    case 2:
+        return "medium";
+    case 3:
+        return "hard";
+    case 4:
+        return "unfiltered";
+    default:
+        return NULL;
+    }
+}
+
+static int boxoban_dir_has_txt(const char* dir_path) {
+    DIR* dir = opendir(dir_path);
+    if (dir == NULL) {
+        return 0;
+    }
+    struct dirent* ent;
+    while ((ent = readdir(dir)) != NULL) {
+        if (boxoban_has_txt_suffix(ent->d_name)) {
+            closedir(dir);
+            return 1;
+        }
+    }
+    closedir(dir);
+    return 0;
+}
+
+static int boxoban_collect_sorted_txt_paths_in_dir(const char* dir_path, BoxobanPathList* out_paths) {
+    DIR* dir = opendir(dir_path);
+    if (dir == NULL) {
+        fprintf(stderr, "Missing level directory %s\n", dir_path);
+        return -1;
+    }
+
+    BoxobanPathList names = {0};
+    struct dirent* ent;
+    while ((ent = readdir(dir)) != NULL) {
+        if (!boxoban_has_txt_suffix(ent->d_name)) {
+            continue;
+        }
+        if (boxoban_path_list_append(&names, ent->d_name) != 0) {
+            boxoban_path_list_free(&names);
+            closedir(dir);
+            return -1;
+        }
+    }
+    closedir(dir);
+
+    qsort(names.items, names.count, sizeof(char*), boxoban_cmp_strings);
+    for (size_t i = 0; i < names.count; i++) {
+        char full_path[1400];
+        snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, names.items[i]);
+        if (boxoban_path_list_append(out_paths, full_path) != 0) {
+            boxoban_path_list_free(&names);
+            return -1;
+        }
+    }
+    boxoban_path_list_free(&names);
+    return 0;
+}
+
+static int boxoban_collect_maps_from_dir(const char* rel_path, BoxobanPathList* out_paths) {
+    char level_dir[1400];
+    struct stat st;
+
+    snprintf(level_dir, sizeof(level_dir), "%s/%s", BOXOBAN_LEVEL_ROOT, rel_path);
+    if (stat(level_dir, &st) != 0 || !S_ISDIR(st.st_mode)) {
+        fprintf(stderr, "Missing level directory %s\n", level_dir);
+        return -1;
+    }
+
+    return boxoban_collect_sorted_txt_paths_in_dir(level_dir, out_paths);
+}
+
+static int boxoban_collect_maps(const char* difficulty, BoxobanPathList* out_paths) {
+    if (strcmp(difficulty, "basic") == 0) {
+        return boxoban_collect_maps_from_dir("basic/train", out_paths);
+    }
+    if (strcmp(difficulty, "easy") == 0) {
+        return boxoban_collect_maps_from_dir("easy/train", out_paths);
+    }
+    if (strcmp(difficulty, "medium") == 0) {
+        return boxoban_collect_maps_from_dir("medium/train", out_paths);
+    }
+    if (strcmp(difficulty, "hard") == 0) {
+        return boxoban_collect_maps_from_dir("hard", out_paths);
+    }
+    if (strcmp(difficulty, "unfiltered") == 0) {
+        return boxoban_collect_maps_from_dir("unfiltered/train", out_paths);
+    }
+
+    fprintf(stderr, "Invalid difficulty '%s'\n", difficulty);
+    return -1;
+}
+
+static int boxoban_download_text_maps(const char* difficulty) {
+    char zip_url[512];
+    snprintf(zip_url, sizeof(zip_url),
+        "https://raw.githubusercontent.com/TBBristol/pufferlib_boxoban_levels/main/%s.zip",
+        difficulty);
+    fprintf(stdout, "[Boxoban] Downloading %s maps from %s\n", difficulty, zip_url);
+
+    char tmp_template[] = "/tmp/boxoban_maps_XXXXXX";
+    char* tmp_dir = mkdtemp(tmp_template);
+    if (tmp_dir == NULL) {
+        return -1;
+    }
+
+    char zip_path[1400];
+    snprintf(zip_path, sizeof(zip_path), "%s/%s.zip", tmp_dir, difficulty);
+
+    char cmd[4096];
+    snprintf(cmd, sizeof(cmd), "curl -L --fail -o '%s' '%s' > /dev/null 2>&1", zip_path, zip_url);
+    if (system(cmd) != 0) {
+        fprintf(stderr, "Failed to download Boxoban maps with curl\n");
+        return -1;
+    }
+
+    snprintf(cmd, sizeof(cmd), "unzip -q '%s' -d '%s'", zip_path, tmp_dir);
+    if (system(cmd) != 0) {
+        fprintf(stderr, "Failed to unzip Boxoban maps archive\n");
+        return -1;
+    }
+
+    char extracted_root[1400] = {0};
+    char find_cmd[4096];
+    snprintf(find_cmd, sizeof(find_cmd), "find '%s' -type d -name '%s' | head -n 1", tmp_dir, difficulty);
+    FILE* find_pipe = popen(find_cmd, "r");
+    if (find_pipe == NULL) {
+        return -1;
+    }
+    if (fgets(extracted_root, sizeof(extracted_root), find_pipe) == NULL) {
+        pclose(find_pipe);
+        fprintf(stderr, "Downloaded zip missing '%s' directory\n", difficulty);
+        return -1;
+    }
+    pclose(find_pipe);
+    extracted_root[strcspn(extracted_root, "\r\n")] = '\0';
+
+    char dest_root[1400];
+    snprintf(dest_root, sizeof(dest_root), "%s/%s", BOXOBAN_LEVEL_ROOT, difficulty);
+    if (boxoban_mkdir_p(dest_root) != 0) {
+        return -1;
+    }
+
+    snprintf(cmd, sizeof(cmd), "cp -R '%s/.' '%s/'", extracted_root, dest_root);
+    if (system(cmd) != 0) {
+        fprintf(stderr, "Failed to copy downloaded maps into %s\n", dest_root);
+        return -1;
+    }
+    return 0;
+}
+
+static int boxoban_ensure_text_maps(const char* difficulty) {
+    if (strcmp(difficulty, "basic") == 0) {
+        char output_dir[1400];
+        snprintf(output_dir, sizeof(output_dir), "%s/basic/train", BOXOBAN_LEVEL_ROOT);
+        if (boxoban_dir_has_txt(output_dir)) {
+            return 0;
+        }
+        fprintf(stdout, "[Boxoban] Generating basic maps at %s\n", output_dir);
+        return boxoban_generate_basic_maps(output_dir, 0);
+    }
+    if (strcmp(difficulty, "easy") == 0) {
+        char output_dir[1400];
+        snprintf(output_dir, sizeof(output_dir), "%s/easy/train", BOXOBAN_LEVEL_ROOT);
+        if (boxoban_dir_has_txt(output_dir)) {
+            return 0;
+        }
+        fprintf(stdout, "[Boxoban] Generating easy maps at %s\n", output_dir);
+        return boxoban_generate_easy_maps(output_dir, 0);
+    }
+    if (strcmp(difficulty, "medium") == 0) {
+        char level_dir[1400];
+        snprintf(level_dir, sizeof(level_dir), "%s/medium/train", BOXOBAN_LEVEL_ROOT);
+        if (boxoban_dir_has_txt(level_dir)) {
+            return 0;
+        }
+        return boxoban_download_text_maps(difficulty);
+    }
+    if (strcmp(difficulty, "hard") == 0) {
+        char level_dir[1400];
+        snprintf(level_dir, sizeof(level_dir), "%s/hard", BOXOBAN_LEVEL_ROOT);
+        if (boxoban_dir_has_txt(level_dir)) {
+            return 0;
+        }
+        return boxoban_download_text_maps(difficulty);
+    }
+    if (strcmp(difficulty, "unfiltered") == 0) {
+        char level_dir[1400];
+        snprintf(level_dir, sizeof(level_dir), "%s/unfiltered/train", BOXOBAN_LEVEL_ROOT);
+        if (boxoban_dir_has_txt(level_dir)) {
+            return 0;
+        }
+        return boxoban_download_text_maps(difficulty);
+    }
+
+    return boxoban_download_text_maps(difficulty);
+}
+
+static int boxoban_bin_path(const char* difficulty, char* out_path, size_t out_cap) {
+    int written = snprintf(out_path, out_cap, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", difficulty);
+    if (written <= 0 || (size_t)written >= out_cap) {
+        return -1;
+    }
+    return 0;
+}
+
+int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
+    if (difficulty == NULL || out_path == NULL) {
+        return -1;
+    }
+    if (boxoban_difficulty_id_from_name(difficulty) < 0) {
+        return -1;
+    }
+    if (boxoban_bin_path(difficulty, out_path, out_cap) != 0) {
+        return -1;
+    }
+
+    if (access(out_path, F_OK) != 0) {
+        if (boxoban_ensure_text_maps(difficulty) != 0) {
+            return -1;
+        }
+
+        BoxobanPathList maps = {0};
+        size_t puzzle_count = 0;
+        if (boxoban_collect_maps(difficulty, &maps) != 0) {
+            boxoban_path_list_free(&maps);
+            return -1;
+        }
+
+        if (boxoban_write_bin_from_files((const char* const*)maps.items, maps.count, out_path, 0, &puzzle_count) != 0) {
+            boxoban_path_list_free(&maps);
+            return -1;
+        }
+        boxoban_path_list_free(&maps);
+        fprintf(stdout, "[Boxoban] Generated %zu puzzles for '%s' at %s\n", puzzle_count, difficulty, out_path);
+    }
+
+    if (boxoban_set_map_path(out_path) != 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static void reset_map_cache(void) {
+    if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) {
+        munmap(MAP_BASE, MAP_FILESIZE);
+    }
+    MAP_BASE = NULL;
+    MAP_FILESIZE = 0;
+    PUZZLE_COUNT = 0;
+}
+
+int boxoban_set_map_path(const char *path) {
+    if (path == NULL) {
+        return -1;
+    }
+    if (BOXOBAN_MAP_PATH != NULL && strcmp(BOXOBAN_MAP_PATH, path) == 0) {
+        return 0;
+    }
+
+    char* copied = malloc(strlen(path) + 1);
+    if (copied == NULL) {
+        return -1;
+    }
+    strcpy(copied, path);
+
+    reset_map_cache();
+    free(BOXOBAN_MAP_PATH);
+    BOXOBAN_MAP_PATH = copied;
+    return 0;
+}
+
+static const char* get_default_map_path(void) {
+    const char* env_path = getenv("BOXOBAN_MAP_BIN");
+    if (env_path != NULL) {
+        return env_path;
+    }
+    return NULL;
+}
+
+void ensure_map_loaded(void) {
+    if (MAP_BASE != NULL) {
+        return;
+    }
+
+    if (BOXOBAN_MAP_PATH == NULL) {
+        const char* default_path = get_default_map_path();
+        if (default_path != NULL) {
+            if (boxoban_set_map_path(default_path) != 0) {
+                fprintf(stderr, "Failed to set default Boxoban map path\n");
+                abort();
+            }
+        } else {
+            char prepared_path[512];
+            if (boxoban_prepare_maps_for_difficulty("basic", prepared_path, sizeof(prepared_path)) != 0) {
+                fprintf(stderr, "Failed to prepare default Boxoban maps\n");
+                abort();
+            }
+        }
+    }
+
+    int fd = open(BOXOBAN_MAP_PATH, O_RDONLY);
+    if (fd < 0) {
+        perror("open");
+        abort();
+    }
+    struct stat st;
+    if (fstat(fd, &st) != 0) {
+        perror("fstat");
+        abort();
+    }
+
+    MAP_FILESIZE = st.st_size;
+    if (MAP_FILESIZE % PUZZLE_SIZE != 0) {
+        fprintf(stderr, "Invalid Boxoban map file size %zu (expected multiple of %zu)\n",
+            MAP_FILESIZE, PUZZLE_SIZE);
+        abort();
+    }
+    PUZZLE_COUNT = MAP_FILESIZE / PUZZLE_SIZE;
+
+    MAP_BASE = mmap(NULL, MAP_FILESIZE, PROT_READ, MAP_PRIVATE, fd, 0);
+    close(fd);
+
+    if (MAP_BASE == MAP_FAILED) {
+        perror("mmap");
+        abort();
+    }
+}
+
+#endif
+
+#endif
diff --git a/pufferlib/ocean/boxoban/generate_maps.h b/pufferlib/ocean/boxoban/generate_maps.h
new file mode 100644
index 0000000000..cdd39a10b7
--- /dev/null
+++ b/pufferlib/ocean/boxoban/generate_maps.h
@@ -0,0 +1,368 @@
+#ifndef PUFFERLIB_OCEAN_BOXOBAN_GENERATE_MAPS_H
+#define PUFFERLIB_OCEAN_BOXOBAN_GENERATE_MAPS_H
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define BOXOBAN_GEN_AGENT '@'
+#define BOXOBAN_GEN_WALL '#'
+#define BOXOBAN_GEN_BOX '$'
+#define BOXOBAN_GEN_TARGET '.'
+#define BOXOBAN_GEN_FLOOR ' '
+
+typedef struct {
+    int r;
+    int c;
+} BoxobanCell;
+
+typedef struct {
+    uint64_t state;
+} BoxobanRandom;
+
+static int boxoban_mkdir_p(const char* dir_path) {
+    char tmp[1024];
+    size_t len = strlen(dir_path);
+    if (len >= sizeof(tmp)) {
+        return -1;
+    }
+
+    memcpy(tmp, dir_path, len + 1);
+    for (size_t i = 1; i < len; i++) {
+        if (tmp[i] == '/') {
+            tmp[i] = '\0';
+            if (mkdir(tmp, 0777) != 0 && errno != EEXIST) {
+                return -1;
+            }
+            tmp[i] = '/';
+        }
+    }
+    if (mkdir(tmp, 0777) != 0 && errno != EEXIST) {
+        return -1;
+    }
+    return 0;
+}
+
+static void boxoban_seed(BoxobanRandom* rng, uint64_t seed) {
+    rng->state = seed ? seed : 0x9e3779b97f4a7c15ULL;
+}
+
+static uint64_t boxoban_next_u64(BoxobanRandom* rng) {
+    uint64_t x = rng->state;
+    x ^= x >> 12;
+    x ^= x << 25;
+    x ^= x >> 27;
+    rng->state = x;
+    return x * 2685821657736338717ULL;
+}
+
+static uint32_t boxoban_randbelow(BoxobanRandom* rng, uint32_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    uint64_t threshold = (uint64_t)(-(int64_t)n) % (uint64_t)n;
+    for (;;) {
+        uint64_t r = boxoban_next_u64(rng);
+        if (r >= threshold) {
+            return (uint32_t)(r % n);
+        }
+    }
+}
+
+static int boxoban_randint(BoxobanRandom* rng, int a, int b) {
+    return a + (int)boxoban_randbelow(rng, (uint32_t)(b - a + 1));
+}
+
+static int boxoban_choice_index(BoxobanRandom* rng, int n) {
+    return (int)boxoban_randbelow(rng, (uint32_t)n);
+}
+
+static int boxoban_sample_indices(BoxobanRandom* rng, int n, int k, int* out_indices) {
+    int* pool = (int*)malloc((size_t)n * sizeof(int));
+    if (pool == NULL) {
+        return -1;
+    }
+
+    for (int i = 0; i < n; i++) {
+        pool[i] = i;
+    }
+
+    for (int i = 0; i < k; i++) {
+        int j = i + (int)boxoban_randbelow(rng, (uint32_t)(n - i));
+        int tmp = pool[i];
+        pool[i] = pool[j];
+        pool[j] = tmp;
+        out_indices[i] = pool[i];
+    }
+
+    free(pool);
+    return 0;
+}
+
+static inline int boxoban_grid_idx(int size, int r, int c) {
+    return r * size + c;
+}
+
+static int boxoban_is_inside(int size, int x, int y) {
+    return x >= 0 && x < size && y >= 0 && y < size;
+}
+
+static int boxoban_is_pushable(const char* grid, int size, int x, int y) {
+    static const int dirs[4][2] = {{1,0}, {-1,0}, {0,1}, {0,-1}};
+    for (int d = 0; d < 4; d++) {
+        int dx = dirs[d][0];
+        int dy = dirs[d][1];
+        int px = x - dx;
+        int py = y - dy;
+        int tx = x + dx;
+        int ty = y + dy;
+        if (!boxoban_is_inside(size, px, py) || !boxoban_is_inside(size, tx, ty)) {
+            continue;
+        }
+        char pre = grid[boxoban_grid_idx(size, py, px)];
+        char post = grid[boxoban_grid_idx(size, ty, tx)];
+        if ((pre == BOXOBAN_GEN_FLOOR || pre == BOXOBAN_GEN_TARGET) &&
+            (post == BOXOBAN_GEN_FLOOR || post == BOXOBAN_GEN_TARGET)) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static void boxoban_build_border_grid(char* grid, int size) {
+    for (int r = 0; r < size; r++) {
+        for (int c = 0; c < size; c++) {
+            grid[boxoban_grid_idx(size, r, c)] = BOXOBAN_GEN_FLOOR;
+        }
+    }
+    for (int i = 0; i < size; i++) {
+        grid[boxoban_grid_idx(size, 0, i)] = BOXOBAN_GEN_WALL;
+        grid[boxoban_grid_idx(size, size - 1, i)] = BOXOBAN_GEN_WALL;
+        grid[boxoban_grid_idx(size, i, 0)] = BOXOBAN_GEN_WALL;
+        grid[boxoban_grid_idx(size, i, size - 1)] = BOXOBAN_GEN_WALL;
+    }
+}
+
+static int boxoban_build_cells(int size, int margin, BoxobanCell* out_cells) {
+    int count = 0;
+    int start = 1 + margin;
+    int end = size - 1 - margin;
+    for (int r = start; r < end; r++) {
+        for (int c = start; c < end; c++) {
+            out_cells[count].r = r;
+            out_cells[count].c = c;
+            count++;
+        }
+    }
+    return count;
+}
+
+static int boxoban_make_puzzle(
+    int size,
+    BoxobanRandom* rng,
+    int num_boxes,
+    int max_attempts,
+    const BoxobanCell* agent_choices,
+    int agent_count,
+    const BoxobanCell* confined,
+    int confined_count,
+    int interior_count,
+    char* grid
+) {
+    if (num_boxes < 1) {
+        fprintf(stderr, "num_boxes must be at least 1\n");
+        return -1;
+    }
+
+    int needed = num_boxes * 2 + 1;
+    if (needed > interior_count) {
+        fprintf(stderr, "Grid interior only has %d cells, cannot place %d objects\n", interior_count, needed);
+        return -1;
+    }
+
+    BoxobanCell* box_candidates = (BoxobanCell*)malloc((size_t)confined_count * sizeof(BoxobanCell));
+    BoxobanCell* box_positions = (BoxobanCell*)malloc((size_t)num_boxes * sizeof(BoxobanCell));
+    BoxobanCell* agent_candidates = (BoxobanCell*)malloc((size_t)agent_count * sizeof(BoxobanCell));
+    int* sampled_idx = (int*)malloc((size_t)num_boxes * sizeof(int));
+    uint8_t* occupied = (uint8_t*)calloc((size_t)size * (size_t)size, sizeof(uint8_t));
+    if (box_candidates == NULL || box_positions == NULL || agent_candidates == NULL || sampled_idx == NULL || occupied == NULL) {
+        free(box_candidates);
+        free(box_positions);
+        free(agent_candidates);
+        free(sampled_idx);
+        free(occupied);
+        return -1;
+    }
+
+    for (int attempt = 0; attempt < max_attempts; attempt++) {
+        boxoban_build_border_grid(grid, size);
+        memset(occupied, 0, (size_t)size * (size_t)size);
+
+        if (boxoban_sample_indices(rng, confined_count, num_boxes, sampled_idx) != 0) {
+            free(box_candidates);
+            free(box_positions);
+            free(agent_candidates);
+            free(sampled_idx);
+            free(occupied);
+            return -1;
+        }
+
+        for (int i = 0; i < num_boxes; i++) {
+            BoxobanCell cell = confined[sampled_idx[i]];
+            grid[boxoban_grid_idx(size, cell.r, cell.c)] = BOXOBAN_GEN_TARGET;
+            occupied[boxoban_grid_idx(size, cell.r, cell.c)] = 1;
+        }
+
+        int box_candidate_count = 0;
+        for (int i = 0; i < confined_count; i++) {
+            BoxobanCell cell = confined[i];
+            if (!occupied[boxoban_grid_idx(size, cell.r, cell.c)]) {
+                box_candidates[box_candidate_count++] = cell;
+            }
+        }
+        if (box_candidate_count < num_boxes) {
+            continue;
+        }
+
+        if (boxoban_sample_indices(rng, box_candidate_count, num_boxes, sampled_idx) != 0) {
+            free(box_candidates);
+            free(box_positions);
+            free(agent_candidates);
+            free(sampled_idx);
+            free(occupied);
+            return -1;
+        }
+        for (int i = 0; i < num_boxes; i++) {
+            BoxobanCell cell = box_candidates[sampled_idx[i]];
+            box_positions[i] = cell;
+            grid[boxoban_grid_idx(size, cell.r, cell.c)] = BOXOBAN_GEN_BOX;
+            occupied[boxoban_grid_idx(size, cell.r, cell.c)] = 1;
+        }
+
+        int agent_candidate_count = 0;
+        for (int i = 0; i < agent_count; i++) {
+            BoxobanCell cell = agent_choices[i];
+            if (!occupied[boxoban_grid_idx(size, cell.r, cell.c)]) {
+                agent_candidates[agent_candidate_count++] = cell;
+            }
+        }
+        if (agent_candidate_count == 0) {
+            continue;
+        }
+
+        BoxobanCell agent_cell = agent_candidates[boxoban_choice_index(rng, agent_candidate_count)];
+        grid[boxoban_grid_idx(size, agent_cell.r, agent_cell.c)] = BOXOBAN_GEN_AGENT;
+
+        int all_pushable = 1;
+        for (int i = 0; i < num_boxes; i++) {
+            BoxobanCell cell = box_positions[i];
+            if (!boxoban_is_pushable(grid, size, cell.c, cell.r)) {
+                all_pushable = 0;
+                break;
+            }
+        }
+
+        if (all_pushable) {
+            free(box_candidates);
+            free(box_positions);
+            free(agent_candidates);
+            free(sampled_idx);
+            free(occupied);
+            return 0;
+        }
+    }
+
+    free(box_candidates);
+    free(box_positions);
+    free(agent_candidates);
+    free(sampled_idx);
+    free(occupied);
+    fprintf(stderr, "Failed to sample a solvable puzzle after many attempts\n");
+    return -1;
+}
+
+static int boxoban_generate_maps(
+    const char* output_dir,
+    int num_files,
+    int puzzles_per_file,
+    int size,
+    int num_boxes,
+    int min_boxes,
+    int max_boxes,
+    uint64_t seed
+) {
+    if (boxoban_mkdir_p(output_dir) != 0) {
+        return -1;
+    }
+
+    BoxobanRandom rng;
+    boxoban_seed(&rng, seed);
+
+    int max_cells = (size - 2) * (size - 2);
+    BoxobanCell* agent_choices = (BoxobanCell*)malloc((size_t)max_cells * sizeof(BoxobanCell));
+    BoxobanCell* confined = (BoxobanCell*)malloc((size_t)max_cells * sizeof(BoxobanCell));
+    char* grid = (char*)malloc((size_t)size * (size_t)size);
+    if (agent_choices == NULL || confined == NULL || grid == NULL) {
+        free(agent_choices);
+        free(confined);
+        free(grid);
+        return -1;
+    }
+
+    int interior_count = (size - 2) * (size - 2);
+    int agent_count = boxoban_build_cells(size, 0, agent_choices);
+    int confined_count = boxoban_build_cells(size, 1, confined);
+
+    for (int file_idx = 0; file_idx < num_files; file_idx++) {
+        char out_path[1200];
+        snprintf(out_path, sizeof(out_path), "%s/%03d.txt", output_dir, file_idx);
+        FILE* out = fopen(out_path, "w");
+        if (out == NULL) {
+            free(agent_choices);
+            free(confined);
+            free(grid);
+            return -1;
+        }
+
+        for (int puzzle_idx = 0; puzzle_idx < puzzles_per_file; puzzle_idx++) {
+            int box_count = num_boxes >= 1 ? num_boxes : boxoban_randint(&rng, min_boxes, max_boxes);
+            if (boxoban_make_puzzle(
+                    size, &rng, box_count, 200, agent_choices, agent_count, confined, confined_count, interior_count, grid) != 0) {
+                fclose(out);
+                free(agent_choices);
+                free(confined);
+                free(grid);
+                return -1;
+            }
+
+            fprintf(out, "; %d\n", puzzle_idx);
+            for (int r = 0; r < size; r++) {
+                fwrite(&grid[boxoban_grid_idx(size, r, 0)], 1, (size_t)size, out);
+                fputc('\n', out);
+            }
+            fputc('\n', out);
+        }
+
+        fclose(out);
+    }
+
+    free(agent_choices);
+    free(confined);
+    free(grid);
+    return 0;
+}
+
+static int boxoban_generate_easy_maps(const char* output_dir, uint64_t seed) {
+    return boxoban_generate_maps(output_dir, 300, 1000, 10, -1, 1, 4, seed);
+}
+
+static int boxoban_generate_basic_maps(const char* output_dir, uint64_t seed) {
+    return boxoban_generate_maps(output_dir, 300, 1000, 10, 1, 1, 4, seed);
+}
+
+#endif
diff --git a/pufferlib/ocean/boxoban/parse_maps.h b/pufferlib/ocean/boxoban/parse_maps.h
new file mode 100644
index 0000000000..79e79ed8e1
--- /dev/null
+++ b/pufferlib/ocean/boxoban/parse_maps.h
@@ -0,0 +1,252 @@
+#ifndef PUFFERLIB_OCEAN_BOXOBAN_PARSE_MAPS_H
+#define PUFFERLIB_OCEAN_BOXOBAN_PARSE_MAPS_H
+
+#include <ctype.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#define BOXOBAN_AGENT_CHAR '@'
+#define BOXOBAN_WALL_CHAR '#'
+#define BOXOBAN_BOX_CHAR '$'
+#define BOXOBAN_TARGET_CHAR '.'
+#define BOXOBAN_BOX_ON_TARGET_CHAR '*'
+#define BOXOBAN_AGENT_ON_TARGET_CHAR '+'
+
+#define BOXOBAN_EXPECTED_ROWS 10
+#define BOXOBAN_EXPECTED_COLS 10
+#define BOXOBAN_PUZZLE_OBS_BYTES (4 * BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS)
+#define BOXOBAN_PUZZLE_META_BYTES 5
+#define BOXOBAN_PUZZLE_BYTES (BOXOBAN_PUZZLE_OBS_BYTES + BOXOBAN_PUZZLE_META_BYTES)
+
+typedef struct {
+    char rows[BOXOBAN_EXPECTED_ROWS][BOXOBAN_EXPECTED_COLS];
+    int row_lengths[BOXOBAN_EXPECTED_ROWS];
+    int row_count;
+} BoxobanPuzzleDraft;
+
+static int boxoban_is_blank_line(const char* line) {
+    const unsigned char* p = (const unsigned char*)line;
+    while (*p != '\0') {
+        if (!isspace(*p)) {
+            return 0;
+        }
+        p++;
+    }
+    return 1;
+}
+
+static int boxoban_validate_shape(const BoxobanPuzzleDraft* draft, char* reason, size_t reason_cap) {
+    if (draft->row_count != BOXOBAN_EXPECTED_ROWS) {
+        snprintf(reason, reason_cap, "expected %d rows, got %d", BOXOBAN_EXPECTED_ROWS, draft->row_count);
+        return -1;
+    }
+
+    for (int r = 0; r < BOXOBAN_EXPECTED_ROWS; r++) {
+        if (draft->row_lengths[r] != BOXOBAN_EXPECTED_COLS) {
+            snprintf(reason, reason_cap, "row %d expected %d cols, got %d",
+                r, BOXOBAN_EXPECTED_COLS, draft->row_lengths[r]);
+            return -1;
+        }
+    }
+
+    reason[0] = '\0';
+    return 0;
+}
+
+static int boxoban_encode_and_write_puzzle(const BoxobanPuzzleDraft* draft, FILE* out, char* reason, size_t reason_cap) {
+    uint8_t agent[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0};
+    uint8_t walls[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0};
+    uint8_t boxes[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0};
+    uint8_t targets[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0};
+    uint8_t meta[BOXOBAN_PUZZLE_META_BYTES] = {0};
+
+    int agent_x = -1;
+    int agent_y = -1;
+    int n_boxes = 0;
+    int n_targets = 0;
+    int on_target = 0;
+
+    int idx = 0;
+    for (int r = 0; r < BOXOBAN_EXPECTED_ROWS; r++) {
+        for (int c = 0; c < BOXOBAN_EXPECTED_COLS; c++, idx++) {
+            char ch = draft->rows[r][c];
+
+            int is_agent = (ch == BOXOBAN_AGENT_CHAR || ch == BOXOBAN_AGENT_ON_TARGET_CHAR);
+            int is_wall = (ch == BOXOBAN_WALL_CHAR);
+            int is_box = (ch == BOXOBAN_BOX_CHAR || ch == BOXOBAN_BOX_ON_TARGET_CHAR);
+            int is_target = (ch == BOXOBAN_TARGET_CHAR || ch == BOXOBAN_BOX_ON_TARGET_CHAR || ch == BOXOBAN_AGENT_ON_TARGET_CHAR);
+
+            if (is_agent) {
+                if (agent_x != -1) {
+                    snprintf(reason, reason_cap, "Puzzle has multiple agents");
+                    return -1;
+                }
+                agent_x = c;
+                agent_y = r;
+            }
+
+            n_boxes += is_box;
+            n_targets += is_target;
+            on_target += (is_box && is_target);
+
+            agent[idx] = (uint8_t)is_agent;
+            walls[idx] = (uint8_t)is_wall;
+            boxes[idx] = (uint8_t)is_box;
+            targets[idx] = (uint8_t)is_target;
+        }
+    }
+
+    if (agent_x == -1) {
+        snprintf(reason, reason_cap, "Puzzle has no agent");
+        return -1;
+    }
+
+    meta[0] = (uint8_t)agent_x;
+    meta[1] = (uint8_t)agent_y;
+    meta[2] = (uint8_t)n_boxes;
+    meta[3] = (uint8_t)n_targets;
+    meta[4] = (uint8_t)on_target;
+
+    if (fwrite(agent, 1, sizeof(agent), out) != sizeof(agent)) return -1;
+    if (fwrite(walls, 1, sizeof(walls), out) != sizeof(walls)) return -1;
+    if (fwrite(boxes, 1, sizeof(boxes), out) != sizeof(boxes)) return -1;
+    if (fwrite(targets, 1, sizeof(targets), out) != sizeof(targets)) return -1;
+    if (fwrite(meta, 1, sizeof(meta), out) != sizeof(meta)) return -1;
+
+    reason[0] = '\0';
+    return 0;
+}
+
+static int boxoban_finalize_puzzle(
+    BoxobanPuzzleDraft* draft,
+    FILE* out,
+    const char* src_path,
+    size_t* puzzle_idx,
+    size_t* written_count
+) {
+    char reason[128];
+    size_t idx = *puzzle_idx;
+    (*puzzle_idx)++;
+
+    if (boxoban_validate_shape(draft, reason, sizeof(reason)) != 0) {
+        fprintf(stdout, "[Boxoban] Skipping malformed puzzle in %s puzzle#%zu: %s\n", src_path, idx, reason);
+        draft->row_count = 0;
+        return 0;
+    }
+
+    if (boxoban_encode_and_write_puzzle(draft, out, reason, sizeof(reason)) != 0) {
+        if (reason[0] == '\0') {
+            return -1;
+        }
+        fprintf(stdout, "[Boxoban] Skipping malformed puzzle in %s puzzle#%zu: %s\n", src_path, idx, reason);
+        draft->row_count = 0;
+        return 0;
+    }
+
+    (*written_count)++;
+    draft->row_count = 0;
+    return 0;
+}
+
+static int boxoban_write_bin_from_files(
+    const char* const* files,
+    size_t file_count,
+    const char* out_path,
+    int verbose,
+    size_t* out_puzzle_count
+) {
+    FILE* out = fopen(out_path, "wb");
+    if (out == NULL) {
+        return -1;
+    }
+
+    size_t puzzle_count = 0;
+
+    for (size_t file_idx = 0; file_idx < file_count; file_idx++) {
+        const char* src_path = files[file_idx];
+        FILE* in = fopen(src_path, "r");
+        if (in == NULL) {
+            fclose(out);
+            return -1;
+        }
+
+        char* line = NULL;
+        size_t line_cap = 0;
+        ssize_t line_len;
+        BoxobanPuzzleDraft draft;
+        memset(&draft, 0, sizeof(draft));
+        size_t puzzle_idx = 0;
+
+        while ((line_len = getline(&line, &line_cap, in)) != -1) {
+            if (line_len > 0 && line[line_len - 1] == '\n') {
+                line[--line_len] = '\0';
+            }
+
+            if (line[0] == ';') {
+                if (draft.row_count > 0) {
+                    if (boxoban_finalize_puzzle(&draft, out, src_path, &puzzle_idx, &puzzle_count) != 0) {
+                        free(line);
+                        fclose(in);
+                        fclose(out);
+                        return -1;
+                    }
+                }
+                continue;
+            }
+
+            if (boxoban_is_blank_line(line)) {
+                continue;
+            }
+
+            if (draft.row_count < BOXOBAN_EXPECTED_ROWS) {
+                int dst_row = draft.row_count;
+                int copy_len = line_len < BOXOBAN_EXPECTED_COLS ? (int)line_len : BOXOBAN_EXPECTED_COLS;
+                memcpy(draft.rows[dst_row], line, (size_t)copy_len);
+                draft.row_lengths[dst_row] = (int)line_len;
+                draft.row_count++;
+            }
+
+            if (draft.row_count == BOXOBAN_EXPECTED_ROWS) {
+                if (boxoban_finalize_puzzle(&draft, out, src_path, &puzzle_idx, &puzzle_count) != 0) {
+                    free(line);
+                    fclose(in);
+                    fclose(out);
+                    return -1;
+                }
+            }
+        }
+
+        free(line);
+        fclose(in);
+    }
+
+    if (fflush(out) != 0) {
+        fclose(out);
+        return -1;
+    }
+
+    long bytes_written = ftell(out);
+    fclose(out);
+    if (bytes_written < 0) {
+        return -1;
+    }
+
+    size_t expected = puzzle_count * BOXOBAN_PUZZLE_BYTES;
+    if ((size_t)bytes_written != expected) {
+        fprintf(stderr, "Wrong output size: got %ld expected %zu\n", bytes_written, expected);
+        return -1;
+    }
+
+    if (verbose) {
+        fprintf(stdout, "Wrote %zu puzzles to %s\n", puzzle_count, out_path);
+    }
+    if (out_puzzle_count != NULL) {
+        *out_puzzle_count = puzzle_count;
+    }
+    return 0;
+}
+
+#endif
diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/Crate_Black.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/Crate_Black.png
new file mode 100644
index 0000000000..e90a03e0ee
Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/Crate_Black.png differ
diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Black.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Black.png
new file mode 100644
index 0000000000..6d52856da3
Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Black.png differ
diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Blue.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Blue.png
new file mode 100644
index 0000000000..e5f0be4fd3
Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Blue.png differ
diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/GroundGravel_Concrete.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/GroundGravel_Concrete.png
new file mode 100644
index 0000000000..aa04f80b8d
Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/GroundGravel_Concrete.png differ
diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/Wall_Black.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/Wall_Black.png
new file mode 100644
index 0000000000..fe7a6bcbde
Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/Wall_Black.png differ
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
index 6c56a4ea20..8abf6ce394 100644
--- a/pufferlib/ocean/environment.py
+++ b/pufferlib/ocean/environment.py
@@ -120,6 +120,7 @@ def make_multiagent(buf=None, **kwargs):
     'battle': 'Battle',
     'breakout': 'Breakout',
     'blastar': 'Blastar',
+    'boxoban': 'Boxoban',
     'convert': 'Convert',
     'convert_circle': 'ConvertCircle',
     'pong': 'Pong',
diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
index c7663c5f5b..ffb0318c3f 100644
--- a/pufferlib/ocean/torch.py
+++ b/pufferlib/ocean/torch.py
@@ -18,6 +18,102 @@
 from pufferlib.pytorch import layer_init, _nativize_dtype, nativize_tensor
 import numpy as np
 
+class Boxoban(nn.Module):
+    """
+    Observations: always (B, 400) = 4 * (10*10), planes concatenated:
+      [agent_plane(100), target_plane(100), box_plane(100), wall_plane(100)]
+    Each plane is binary/float occupancy. Target+box can co-locate naturally.
+
+    Embedding per cell:
+      cell_vec = pos_embed[cell] + sum_{type present} type_embed[type]
+    """
+
+    def __init__(self, env, hidden_size=128, embed_dim=8):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.embed_dim = embed_dim
+
+        self.is_multidiscrete = isinstance(env.single_action_space, pufferlib.spaces.MultiDiscrete)
+        self.is_continuous = isinstance(env.single_action_space, pufferlib.spaces.Box)
+
+        # Fixed layout
+        self.num_types = 4
+        self.num_cells = 100
+        self.obs_n = 400
+
+        self.type_embed = nn.Embedding(self.num_types, self.embed_dim)
+        self.pos_embed = nn.Embedding(self.num_cells, self.embed_dim)
+
+        self.encoder = nn.Sequential(
+            pufferlib.pytorch.layer_init(nn.Linear(self.num_cells * self.embed_dim, 2 * hidden_size)),
+            nn.GELU(),
+            pufferlib.pytorch.layer_init(nn.Linear(2 * hidden_size, hidden_size)),
+            nn.GELU(),
+            pufferlib.pytorch.layer_init(nn.Linear(hidden_size, hidden_size)),
+            nn.GELU(),
+        )
+
+        if self.is_multidiscrete:
+            self.action_nvec = tuple(env.single_action_space.nvec)
+            num_atns = sum(self.action_nvec)
+            self.decoder = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, num_atns), std=0.01)
+        elif not self.is_continuous:
+            num_atns = env.single_action_space.n
+            self.decoder = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, num_atns), std=0.01)
+        else:
+            self.decoder_mean = pufferlib.pytorch.layer_init(
+                nn.Linear(hidden_size, env.single_action_space.shape[0]), std=0.01
+            )
+            self.decoder_logstd = nn.Parameter(torch.zeros(1, env.single_action_space.shape[0]))
+
+        self.value = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, 1), std=1.0)
+
+    def forward_eval(self, observations, state=None):
+        hidden = self.encode_observations(observations, state=state)
+        logits, values = self.decode_actions(hidden)
+        return logits, values
+
+    def forward(self, observations, state=None):
+        return self.forward_eval(observations, state)
+
+    def encode_observations(self, observations, state=None):
+        # observations: (B, 400)
+        B = observations.shape[0]
+        x = observations
+        if x.shape[1] != self.obs_n:
+            raise ValueError(f"Expected observations shape (B, {self.obs_n}), got {tuple(x.shape)}")
+        if x.dtype not in (torch.float16, torch.float32, torch.bfloat16):
+            x = x.float()
+
+        # (B, 400) -> (B, 4, 100) -> (B, 100, 4)
+        x = x.view(B, self.num_types, self.num_cells).permute(0, 2, 1).contiguous()
+
+        # Sum entity-type embeddings for present types
+        type_vec = x @ self.type_embed.weight  # (B, 100, embed_dim)
+
+        # Add position embedding
+        pos_vec = self.pos_embed.weight.unsqueeze(0).expand(B, -1, -1)  # (B, 100, embed_dim)
+
+        cell_vec = type_vec + pos_vec
+        flat = cell_vec.view(B, self.num_cells * self.embed_dim)
+        return self.encoder(flat)
+
+    def decode_actions(self, hidden):
+        if self.is_multidiscrete:
+            logits = self.decoder(hidden).split(self.action_nvec, dim=1)
+        elif self.is_continuous:
+            mean = self.decoder_mean(hidden)
+            logstd = self.decoder_logstd.expand_as(mean)
+            std = torch.exp(logstd)
+            logits = torch.distributions.Normal(mean, std)
+        else:
+            logits = self.decoder(hidden)
+
+        values = self.value(hidden)
+        return logits, values
+
+
+
 
 class Boids(nn.Module):
     def __init__(self, env, cnn_channels=32, hidden_size=128, **kwargs):