Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
dba941d
inital
TBBristol Dec 1, 2025
319d30c
working demo
TBBristol Dec 2, 2025
10854dd
mmap changes, working on eval, gitignore maps files
TBBristol Dec 3, 2025
a7d1064
functioning as far as I can tell, no learning yet
TBBristol Dec 3, 2025
7b0b45b
running sweeps removed bptt sweep
TBBristol Dec 3, 2025
4c78c21
fixed clear logic in take action , added intermediated rewards
TBBristol Dec 4, 2025
9e4ffa8
intrweard into ini
TBBristol Dec 4, 2025
5b4159e
fixed get idx for puzzle to int32 to avoid wrap
TBBristol Dec 4, 2025
d69e3f9
generate maps tweaks
TBBristol Jan 1, 2026
60377d3
adding gen easy maps py
TBBristol Jan 1, 2026
854989f
gen maps make sure doesnt spawn at edghe, added int rew coeef 0.1 rem…
TBBristol Jan 1, 2026
aef6916
int targets logs
TBBristol Jan 1, 2026
bd15697
fied generate easy, penalty for box off
TBBristol Jan 1, 2026
9dfa988
map bin generation changes
TBBristol Jan 2, 2026
8714f61
Add boxoban-levels as submodule
TBBristol Jan 2, 2026
849edc5
dash diff, basic maps
TBBristol Jan 2, 2026
b28c7cf
levels add
TBBristol Jan 2, 2026
29ba0ec
demp working with difficulty etc
TBBristol Jan 2, 2026
1277ea2
custom cnn
TBBristol Jan 2, 2026
705632b
tidy and readme
TBBristol Jan 2, 2026
9f7a212
Add image to README for boxoban
TBBristol Jan 2, 2026
d963ab3
Fix game name in README
TBBristol Jan 2, 2026
60b7ef1
Fix formatting in README.md for Boxoban game
TBBristol Jan 2, 2026
1f39635
Update README with Easy difficulty level details
TBBristol Jan 2, 2026
40e418d
Vendor boxoban-levels (remove submodule)
TBBristol Jan 2, 2026
a87ba60
Merge branch 'boxoban' of https://github.com/TBBristol/PufferLib into…
TBBristol Jan 2, 2026
6c9a246
Update README to include .bin generation info
TBBristol Jan 2, 2026
18eb05b
Fix formatting of header in README.md
TBBristol Jan 2, 2026
1d4b79b
Update README with manual play instructions
TBBristol Jan 2, 2026
9870854
Update README with gameplay instructions
TBBristol Jan 2, 2026
e594ea6
Delete boxoban.dSYM directory
TBBristol Jan 2, 2026
4c3a150
resotre default ini
TBBristol Jan 2, 2026
cdc0304
Ignore dSYM artifacts
TBBristol Jan 2, 2026
8d53c46
map generation bug fix
TBBristol Jan 2, 2026
e52f2e9
training basic
TBBristol Jan 28, 2026
db5fd21
perf changes
Jan 28, 2026
8d66c6c
perf fixed?
Jan 28, 2026
ea21741
trained basic
Jan 28, 2026
5d6c300
fix map parse bug agent ontarget
Jan 29, 2026
b543745
embedds
TBBristol Jan 31, 2026
96d9401
added len coeff in ini
Feb 1, 2026
f56c243
ini
Feb 1, 2026
3b16c62
fps change
Feb 1, 2026
666b5da
ignore map bins
Feb 3, 2026
305ae9a
parse maps to bin print fix
Feb 3, 2026
199662d
getting profiling to work in .c
Feb 3, 2026
5d542a6
little tidying
Feb 3, 2026
af7f2d2
Delete pufferlib/ocean/boxoban/boxoban-levels/CONTRIBUTING.md
TBBristol Feb 3, 2026
e03e281
del lisence extra
Feb 3, 2026
eca845c
add map bins to ignore
Feb 3, 2026
932a983
move model
Feb 3, 2026
8193a4d
restor original models and default ini
Feb 3, 2026
f577c3c
Delete .gitmodules
TBBristol Feb 3, 2026
e6873d3
pufferl revert
Feb 3, 2026
dbed367
Merge branch 'PufferAI:3.0' into boxoban
TBBristol Feb 3, 2026
6876842
revrrt breakout.c
Feb 3, 2026
0d8f87c
Remove path reference from resources file
TBBristol Feb 4, 2026
bfee219
map generation changes and some dels
TBBristol Feb 5, 2026
50a6dff
del levels
TBBristol Feb 5, 2026
73db6f3
update readme
TBBristol Feb 5, 2026
ce66080
.env intiialisation in .c file and remove length penaty
Mar 5, 2026
57d532c
allocation changes
TBBristol Mar 5, 2026
6159778
removed OBS macro and replaced with sset and get entity inlines
Mar 5, 2026
5c12edc
removed int reward macro
Mar 5, 2026
772c2dc
removed loops from ebveryitng except reset
Mar 5, 2026
0c800c0
na
Mar 5, 2026
67119d2
generated sprites
TBBristol Mar 5, 2026
36e44f4
del redundant lisence
TBBristol Mar 5, 2026
3a8ca5b
move to one loop on reset
TBBristol Mar 7, 2026
6ad9a96
code clarity
TBBristol Mar 7, 2026
9966376
int not unsugned char on intr
TBBristol Mar 7, 2026
5c34a4c
use metadata in map files rather that loop
TBBristol Mar 7, 2026
47df5bc
int initialisation
Mar 7, 2026
aca9e99
parse maps checks
TBBristol Mar 7, 2026
f963655
intr sizing fix
Mar 7, 2026
2a55699
make take action simpler
Mar 7, 2026
c623c71
default policy sweep
Mar 7, 2026
fe42393
c maps
TBBristol Mar 8, 2026
463aa09
adding parsing
TBBristol Mar 8, 2026
bbe7678
deleteing unused py
Mar 14, 2026
42faf7e
remove old py matching rng
Mar 14, 2026
9f26532
move maps stuff out of boxoban.h
Mar 14, 2026
857402b
imports
Mar 14, 2026
90793da
formatting
Mar 14, 2026
c6e8e43
remove readme
Mar 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
boxoban_maps_*.bin

# Annoying temp files generated by Cython
c_*.c
pufferlib/extensions.c
Expand Down Expand Up @@ -162,3 +164,5 @@ pufferlib/ocean/impulse_wars/*-release/
pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/

*.dSYM/
57 changes: 57 additions & 0 deletions pufferlib/config/ocean/boxoban.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[base]
package = ocean
env_name = puffer_boxoban
policy_name = Policy
rnn_name = Recurrent

[vec]
num_envs = 2

[env]
num_envs = 1024
#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered
difficulty = 1
#reward per intermediate target (once per episode)
int_r_coeff = 0.25
#moving box off target
target_loss_pen_coeff = 0.0

[policy]


[train]

#EASY
adam_beta1 = 0.8731132476489148
adam_beta2 = 0.97965686417704
adam_eps = 0.00000000008123794869
anneal_lr = "true"
batch_size = "auto"
bptt_horizon = 64
clip_coef = 0.01
ent_coef = 0.01595981947421829
gae_lambda = 0.6982154990440731
gamma = 0.98663093763856
learning_rate = 0.03199264297422195
max_grad_norm = 0.5768091592872416
max_minibatch_size = 32768
min_lr_ratio = 0.37872027027338984
minibatch_size = 8192
optimizer = "muon"
precision = "float32"
prio_alpha = 0.99
prio_beta0 = 0.930949266538068
total_timesteps = 82565313
update_epochs = 1
use_rnn = true
vf_clip_coef = 2.9197817585307435
vf_coef = 4.787362674459031
vtrace_c_clip = 4.90924508575585
vtrace_rho_clip = 4.073806432722373


[sweep.train.minibatch_size]
distribution = uniform_pow2
min = 4096
max = 32768
scale = auto
66 changes: 66 additions & 0 deletions pufferlib/ocean/boxoban/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap
#include "boxoban.h"
#define Env Boxoban
#include "../env_binding.h"

static int parse_difficulty_id(PyObject* kwargs, int* out_difficulty_id) {
int difficulty_id = 0;
PyObject* difficulty_obj = PyDict_GetItemString(kwargs, "difficulty");
if (difficulty_obj != NULL) {
if (PyLong_Check(difficulty_obj)) {
long parsed_id = PyLong_AsLong(difficulty_obj);
if (boxoban_difficulty_name_from_id((int)parsed_id) == NULL) {
PyErr_Format(
PyExc_ValueError,
"Boxoban 'difficulty' int must be in [0, 4], got %ld (0=basic, 1=easy, 2=medium, 3=hard, 4=unfiltered)",
parsed_id
);
return -1;
}
difficulty_id = (int)parsed_id;
} else if (PyUnicode_Check(difficulty_obj)) {
const char* difficulty_name = PyUnicode_AsUTF8(difficulty_obj);
if (difficulty_name == NULL) {
return -1;
}
difficulty_id = boxoban_difficulty_id_from_name(difficulty_name);
if (difficulty_id < 0) {
PyErr_Format(
PyExc_ValueError,
"Boxoban 'difficulty' string must be one of: basic, easy, medium, hard, unfiltered (got '%s')",
difficulty_name
);
return -1;
}
} else {
PyErr_SetString(
PyExc_TypeError,
"Boxoban 'difficulty' must be an int (0..4) or string (basic/easy/medium/hard/unfiltered)"
);
return -1;
}
}
*out_difficulty_id = difficulty_id;
return 0;
}

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
if (parse_difficulty_id(kwargs, &env->difficulty_id) != 0) {
return -1;
}
env->size = (int)unpack(kwargs, "size");
env->max_steps = (int)unpack(kwargs, "max_steps");
env->int_r_coeff = (float)unpack(kwargs, "int_r_coeff");
env->target_loss_pen_coeff = (float)unpack(kwargs, "target_loss_pen_coeff");
init(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "targets_hit", log->on_targets);
return 0;
}
194 changes: 194 additions & 0 deletions pufferlib/ocean/boxoban/boxoban.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/* Pure C demo file for Boxoban. Usage:
* bash scripts/build_ocean.sh boxoban
* ./boxoban [difficulty|path_to_bin]
*
* If you pass one of the known difficulty names (basic, easy, medium,
* hard, unfiltered) the demo looks for pufferlib/ocean/boxoban/boxoban_maps_<difficulty>.bin
* Otherwise the argument is treated as an explicit path to a bin file.
*/

#define BOXOBAN_MAPS_IMPLEMENTATION
#include "boxoban.h"

static int is_named_difficulty(const char* arg) {
return strcmp(arg, "basic") == 0 ||
strcmp(arg, "easy") == 0 ||
strcmp(arg, "medium") == 0 ||
strcmp(arg, "hard") == 0 ||
strcmp(arg, "unfiltered") == 0;
}

static const char* resolve_map_path(int argc, char** argv, char* buffer, size_t buf_sz) {
const char* arg = argc > 1 ? argv[1] : NULL;
if (arg == NULL) {
if (boxoban_prepare_maps_for_difficulty("easy", buffer, buf_sz) != 0) {
return NULL;
}
return buffer;
}
if (strchr(arg, '/')) {
return arg;
}
if (is_named_difficulty(arg)) {
if (boxoban_prepare_maps_for_difficulty(arg, buffer, buf_sz) != 0) {
return NULL;
}
return buffer;
}
snprintf(buffer, buf_sz, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", arg);
return buffer;
}


int demo(int argc, char** argv) {
char path_buffer[512];
const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
if (chosen_path == NULL) {
fprintf(stderr, "Failed to prepare map path\n");
return 1;
}
if (boxoban_set_map_path(chosen_path) != 0) {
fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
return 1;
}

Boxoban env = {
.size = 10,
.observations = NULL,
.actions = NULL,
.rewards = NULL,
.terminals = NULL,
.max_steps = 500,
.int_r_coeff = 0.1f,
.target_loss_pen_coeff = 0.5f,
.tick = 0,
.agent_x = 0,
.agent_y = 0,
.intermediate_rewards = NULL,
.on_target = 0,
.n_boxes = 0,
.win = 0,
.difficulty_id = -1,
.client = NULL,
.n_targets = 0,

};

size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
env.observations = calloc(obs_count, sizeof(unsigned char));
env.actions = calloc(1, sizeof(int));
env.rewards = calloc(1, sizeof(float));
env.terminals = calloc(1, sizeof(unsigned char));

init(&env);
c_reset(&env);
c_render(&env);
while (!WindowShouldClose()) {
if (IsKeyPressed(KEY_LEFT_SHIFT) || IsKeyPressed(KEY_RIGHT_SHIFT)) {
TraceLog(LOG_INFO, "Shift key pressed");
}
bool manual = IsKeyDown(KEY_LEFT_SHIFT) || IsKeyDown(KEY_RIGHT_SHIFT);
bool stepped = false;
if (manual) {
int new_action = -1;
if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) new_action = UP;
if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) new_action = DOWN;
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) new_action = LEFT;
if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) new_action = RIGHT;

if (new_action >= 0) {
env.actions[0] = new_action;
c_step(&env);
stepped = true;
}
} else {
env.actions[0] = rand() % 5;
c_step(&env);
stepped = true;
}

if (!stepped) {
// Manual mode with no direction: stay paused
}
c_render(&env);
}
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
return 0;
}

void test_performance(int argc, char** argv, int timeout) {
char path_buffer[512];
const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
if (chosen_path == NULL) {
fprintf(stderr, "Failed to prepare map path\n");
return;
}
if (boxoban_set_map_path(chosen_path) != 0) {
fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
return;
}
printf("Loaded map: %s\n", chosen_path);

Boxoban env = {
.size = 10,
.observations = NULL,
.actions = NULL,
.rewards = NULL,
.terminals = NULL,
.max_steps = 500,
.int_r_coeff = 0.1f,
.target_loss_pen_coeff = 0.5f,
.tick = 0,
.agent_x = 0,
.agent_y = 0,
.intermediate_rewards = NULL,
.on_target = 0,
.n_boxes = 0,
.win = 0,
.difficulty_id = -1,
.client = NULL,
.n_targets = 0,
};

size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
env.observations = calloc(obs_count, sizeof(unsigned char));
env.actions = calloc(1, sizeof(int));
env.rewards = calloc(1, sizeof(float));
env.terminals = calloc(1, sizeof(unsigned char));

printf("Initializing...\n");
init(&env);
printf("Resetting...\n");
c_reset(&env);
printf("Starting test...\n");

int start = time(NULL);
int num_steps = 0;
while (time(NULL) - start < timeout) {
env.actions[0] = rand() % 5;
c_step(&env);
num_steps++;
}

int end = time(NULL);
float sps = num_steps / (end - start);
printf("Test Environment SPS: %f\n", sps);
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}

int main(int argc, char** argv) {
demo(argc, argv);
setbuf(stdout, NULL);
fprintf(stderr, "Entered main\n");
fflush(stderr);
//test_performance(argc, argv,10);
return 0;
}
Loading
Loading