PufferAI · TBBristol · Dec 1, 2025 · Dec 2, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+boxoban_maps_*.bin
+
 # Annoying temp files generated by Cython
 c_*.c
 pufferlib/extensions.c
@@ -162,3 +164,5 @@ pufferlib/ocean/impulse_wars/*-release/
 pufferlib/ocean/impulse_wars/debug-*/
 pufferlib/ocean/impulse_wars/release-*/
 pufferlib/ocean/impulse_wars/benchmark/
+
+*.dSYM/
diff --git a/pufferlib/config/ocean/boxoban.ini b/pufferlib/config/ocean/boxoban.ini
@@ -0,0 +1,57 @@
+[base]
+package = ocean
+env_name = puffer_boxoban
+policy_name = Policy
+rnn_name = Recurrent
+
+[vec]
+num_envs = 2
+
+[env]
+num_envs = 1024
+#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered
+difficulty = 1
+#reward per intermediate target (once per episode)
+int_r_coeff = 0.25
+#moving box off target
+target_loss_pen_coeff = 0.0
+
+[policy]
+
+
+[train]
+
+#EASY
+adam_beta1 = 0.8731132476489148
+adam_beta2 = 0.97965686417704
+adam_eps = 0.00000000008123794869
+anneal_lr = "true"
+batch_size = "auto"
+bptt_horizon = 64
+clip_coef = 0.01
+ent_coef = 0.01595981947421829
+gae_lambda = 0.6982154990440731
+gamma = 0.98663093763856
+learning_rate = 0.03199264297422195
+max_grad_norm = 0.5768091592872416
+max_minibatch_size = 32768
+min_lr_ratio = 0.37872027027338984
+minibatch_size = 8192
+optimizer = "muon"
+precision = "float32"
+prio_alpha = 0.99
+prio_beta0 = 0.930949266538068
+total_timesteps = 82565313
+update_epochs = 1
+use_rnn = true
+vf_clip_coef = 2.9197817585307435
+vf_coef = 4.787362674459031
+vtrace_c_clip = 4.90924508575585
+vtrace_rho_clip = 4.073806432722373
+
+
+[sweep.train.minibatch_size]
+distribution = uniform_pow2
+min = 4096
+max = 32768
+scale = auto
diff --git a/pufferlib/ocean/boxoban/binding.c b/pufferlib/ocean/boxoban/binding.c
@@ -0,0 +1,66 @@
+#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap
+#include "boxoban.h"
+#define Env Boxoban
+#include "../env_binding.h"
+
+static int parse_difficulty_id(PyObject* kwargs, int* out_difficulty_id) {
+    int difficulty_id = 0;
+    PyObject* difficulty_obj = PyDict_GetItemString(kwargs, "difficulty");
+    if (difficulty_obj != NULL) {
+        if (PyLong_Check(difficulty_obj)) {
+            long parsed_id = PyLong_AsLong(difficulty_obj);
+            if (boxoban_difficulty_name_from_id((int)parsed_id) == NULL) {
+                PyErr_Format(
+                    PyExc_ValueError,
+                    "Boxoban 'difficulty' int must be in [0, 4], got %ld (0=basic, 1=easy, 2=medium, 3=hard, 4=unfiltered)",
+                    parsed_id
+                );
+                return -1;
+            }
+            difficulty_id = (int)parsed_id;
+        } else if (PyUnicode_Check(difficulty_obj)) {
+            const char* difficulty_name = PyUnicode_AsUTF8(difficulty_obj);
+            if (difficulty_name == NULL) {
+                return -1;
+            }
+            difficulty_id = boxoban_difficulty_id_from_name(difficulty_name);
+            if (difficulty_id < 0) {
+                PyErr_Format(
+                    PyExc_ValueError,
+                    "Boxoban 'difficulty' string must be one of: basic, easy, medium, hard, unfiltered (got '%s')",
+                    difficulty_name
+                );
+                return -1;
+            }
+        } else {
+            PyErr_SetString(
+                PyExc_TypeError,
+                "Boxoban 'difficulty' must be an int (0..4) or string (basic/easy/medium/hard/unfiltered)"
+            );
+            return -1;
+        }
+    }
+    *out_difficulty_id = difficulty_id;
+    return 0;
+}
+
+static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
+    if (parse_difficulty_id(kwargs, &env->difficulty_id) != 0) {
+        return -1;
+    }
+    env->size = (int)unpack(kwargs, "size");
+    env->max_steps = (int)unpack(kwargs, "max_steps");
+    env->int_r_coeff = (float)unpack(kwargs, "int_r_coeff");
+    env->target_loss_pen_coeff = (float)unpack(kwargs, "target_loss_pen_coeff");
+    init(env);
+    return 0;
+}
+
+static int my_log(PyObject* dict, Log* log) {
+    assign_to_dict(dict, "perf", log->perf);
+    assign_to_dict(dict, "score", log->score);
+    assign_to_dict(dict, "episode_return", log->episode_return);
+    assign_to_dict(dict, "episode_length", log->episode_length);
+    assign_to_dict(dict, "targets_hit", log->on_targets);
+    return 0;
+}
diff --git a/pufferlib/ocean/boxoban/boxoban.c b/pufferlib/ocean/boxoban/boxoban.c
@@ -0,0 +1,194 @@
+/* Pure C demo file for Boxoban. Usage:
+ *   bash scripts/build_ocean.sh boxoban
+ *   ./boxoban [difficulty|path_to_bin]
+ *
+ * If you pass one of the known difficulty names (basic, easy, medium,
+ * hard, unfiltered) the demo looks for pufferlib/ocean/boxoban/boxoban_maps_<difficulty>.bin
+ * Otherwise the argument is treated as an explicit path to a bin file.
+ */
+
+#define BOXOBAN_MAPS_IMPLEMENTATION
+#include "boxoban.h"
+
+static int is_named_difficulty(const char* arg) {
+    return strcmp(arg, "basic") == 0 ||
+        strcmp(arg, "easy") == 0 ||
+        strcmp(arg, "medium") == 0 ||
+        strcmp(arg, "hard") == 0 ||
+        strcmp(arg, "unfiltered") == 0;
+}
+
+static const char* resolve_map_path(int argc, char** argv, char* buffer, size_t buf_sz) {
+    const char* arg = argc > 1 ? argv[1] : NULL;
+    if (arg == NULL) {
+        if (boxoban_prepare_maps_for_difficulty("easy", buffer, buf_sz) != 0) {
+            return NULL;
+        }
+        return buffer;
+    }
+    if (strchr(arg, '/')) {
+        return arg;
+    }
+    if (is_named_difficulty(arg)) {
+        if (boxoban_prepare_maps_for_difficulty(arg, buffer, buf_sz) != 0) {
+            return NULL;
+        }
+        return buffer;
+    }
+    snprintf(buffer, buf_sz, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", arg);
+    return buffer;
+}
+
+
+int demo(int argc, char** argv) {
+    char path_buffer[512];
+    const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
+    if (chosen_path == NULL) {
+        fprintf(stderr, "Failed to prepare map path\n");
+        return 1;
+    }
+    if (boxoban_set_map_path(chosen_path) != 0) {
+        fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
+        return 1;
+    }
+
+    Boxoban env = {
+        .size = 10,
+        .observations = NULL,
+        .actions = NULL,
+        .rewards = NULL,
+        .terminals = NULL,
+        .max_steps = 500,
+        .int_r_coeff = 0.1f,
+        .target_loss_pen_coeff = 0.5f,
+        .tick = 0,
+        .agent_x = 0,
+        .agent_y = 0,
+        .intermediate_rewards = NULL,
+        .on_target = 0,
+        .n_boxes = 0,
+        .win = 0,
+        .difficulty_id = -1,
+        .client = NULL,
+        .n_targets = 0,
+
+    };
+
+    size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
+    env.observations = calloc(obs_count, sizeof(unsigned char));
+    env.actions = calloc(1, sizeof(int));
+    env.rewards = calloc(1, sizeof(float));
+    env.terminals = calloc(1, sizeof(unsigned char));
+
+    init(&env);
+    c_reset(&env);
+    c_render(&env);
+    while (!WindowShouldClose()) {
+        if (IsKeyPressed(KEY_LEFT_SHIFT) || IsKeyPressed(KEY_RIGHT_SHIFT)) {
+            TraceLog(LOG_INFO, "Shift key pressed");
+        }
+        bool manual = IsKeyDown(KEY_LEFT_SHIFT) || IsKeyDown(KEY_RIGHT_SHIFT);
+        bool stepped = false;
+        if (manual) {
+            int new_action = -1;
+            if (IsKeyDown(KEY_UP)    || IsKeyDown(KEY_W)) new_action = UP;
+            if (IsKeyDown(KEY_DOWN)  || IsKeyDown(KEY_S)) new_action = DOWN;
+            if (IsKeyDown(KEY_LEFT)  || IsKeyDown(KEY_A)) new_action = LEFT;
+            if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) new_action = RIGHT;
+
+            if (new_action >= 0) {
+                env.actions[0] = new_action;
+                c_step(&env);
+                stepped = true;
+            }
+        } else {
+            env.actions[0] = rand() % 5;
+            c_step(&env);
+            stepped = true;
+        }
+
+        if (!stepped) {
+            // Manual mode with no direction: stay paused
+        }
+        c_render(&env);
+    }
+    free(env.observations);
+    free(env.actions);
+    free(env.rewards);
+    free(env.terminals);
+    c_close(&env);
+    return 0;
+}
+
+void test_performance(int argc, char** argv, int timeout) {
+    char path_buffer[512];
+    const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
+    if (chosen_path == NULL) {
+        fprintf(stderr, "Failed to prepare map path\n");
+        return;
+    }
+    if (boxoban_set_map_path(chosen_path) != 0) {
+        fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
+        return;
+    }
+    printf("Loaded map: %s\n", chosen_path);
+
+    Boxoban env = {
+        .size = 10,
+        .observations = NULL,
+        .actions = NULL,
+        .rewards = NULL,
+        .terminals = NULL,
+        .max_steps = 500,
+        .int_r_coeff = 0.1f,
+        .target_loss_pen_coeff = 0.5f,
+        .tick = 0,
+        .agent_x = 0,
+        .agent_y = 0,
+        .intermediate_rewards = NULL,
+        .on_target = 0,
+        .n_boxes = 0,
+        .win = 0,
+        .difficulty_id = -1,
+        .client = NULL,
+        .n_targets = 0,
+    };
+
+    size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
+    env.observations = calloc(obs_count, sizeof(unsigned char));
+    env.actions = calloc(1, sizeof(int));
+    env.rewards = calloc(1, sizeof(float));
+    env.terminals = calloc(1, sizeof(unsigned char));
+
+    printf("Initializing...\n");
+    init(&env);
+    printf("Resetting...\n");
+    c_reset(&env);
+    printf("Starting test...\n");
+
+    int start = time(NULL);
+    int num_steps = 0;
+    while (time(NULL) - start < timeout) {
+        env.actions[0] = rand() % 5;
+        c_step(&env);
+        num_steps++;
+    }
+
+    int end = time(NULL);
+    float sps = num_steps / (end - start);
+    printf("Test Environment SPS: %f\n", sps);
+    free(env.observations);
+    free(env.actions);
+    free(env.rewards);
+    free(env.terminals);
+    c_close(&env);
+}
+
+int main(int argc, char** argv) {
+    demo(argc, argv);
+    setbuf(stdout, NULL);
+    fprintf(stderr, "Entered main\n");
+    fflush(stderr);
+    //test_performance(argc, argv,10);
+    return 0;
+}