diff --git a/.gitignore b/.gitignore index f9082380e0..b479f86052 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +boxoban_maps_*.bin + # Annoying temp files generated by Cython c_*.c pufferlib/extensions.c @@ -162,3 +164,5 @@ pufferlib/ocean/impulse_wars/*-release/ pufferlib/ocean/impulse_wars/debug-*/ pufferlib/ocean/impulse_wars/release-*/ pufferlib/ocean/impulse_wars/benchmark/ + +*.dSYM/ diff --git a/pufferlib/config/ocean/boxoban.ini b/pufferlib/config/ocean/boxoban.ini new file mode 100644 index 0000000000..f2e912d19c --- /dev/null +++ b/pufferlib/config/ocean/boxoban.ini @@ -0,0 +1,57 @@ +[base] +package = ocean +env_name = puffer_boxoban +policy_name = Policy +rnn_name = Recurrent + +[vec] +num_envs = 2 + +[env] +num_envs = 1024 +#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered +difficulty = 1 +#reward per intermediate target (once per episode) +int_r_coeff = 0.25 +#moving box off target +target_loss_pen_coeff = 0.0 + +[policy] + + +[train] + +#EASY +adam_beta1 = 0.8731132476489148 +adam_beta2 = 0.97965686417704 +adam_eps = 0.00000000008123794869 +anneal_lr = "true" +batch_size = "auto" +bptt_horizon = 64 +clip_coef = 0.01 +ent_coef = 0.01595981947421829 +gae_lambda = 0.6982154990440731 +gamma = 0.98663093763856 +learning_rate = 0.03199264297422195 +max_grad_norm = 0.5768091592872416 +max_minibatch_size = 32768 +min_lr_ratio = 0.37872027027338984 +minibatch_size = 8192 +optimizer = "muon" +precision = "float32" +prio_alpha = 0.99 +prio_beta0 = 0.930949266538068 +total_timesteps = 82565313 +update_epochs = 1 +use_rnn = true +vf_clip_coef = 2.9197817585307435 +vf_coef = 4.787362674459031 +vtrace_c_clip = 4.90924508575585 +vtrace_rho_clip = 4.073806432722373 + + +[sweep.train.minibatch_size] +distribution = uniform_pow2 +min = 4096 +max = 32768 +scale = auto diff --git a/pufferlib/ocean/boxoban/binding.c b/pufferlib/ocean/boxoban/binding.c new file mode 100644 index 0000000000..6d0749cc10 --- /dev/null +++ b/pufferlib/ocean/boxoban/binding.c @@ -0,0 +1,66 @@ +#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap +#include "boxoban.h" +#define Env Boxoban +#include "../env_binding.h" + +static int parse_difficulty_id(PyObject* kwargs, int* out_difficulty_id) { + int difficulty_id = 0; + PyObject* difficulty_obj = PyDict_GetItemString(kwargs, "difficulty"); + if (difficulty_obj != NULL) { + if (PyLong_Check(difficulty_obj)) { + long parsed_id = PyLong_AsLong(difficulty_obj); + if (boxoban_difficulty_name_from_id((int)parsed_id) == NULL) { + PyErr_Format( + PyExc_ValueError, + "Boxoban 'difficulty' int must be in [0, 4], got %ld (0=basic, 1=easy, 2=medium, 3=hard, 4=unfiltered)", + parsed_id + ); + return -1; + } + difficulty_id = (int)parsed_id; + } else if (PyUnicode_Check(difficulty_obj)) { + const char* difficulty_name = PyUnicode_AsUTF8(difficulty_obj); + if (difficulty_name == NULL) { + return -1; + } + difficulty_id = boxoban_difficulty_id_from_name(difficulty_name); + if (difficulty_id < 0) { + PyErr_Format( + PyExc_ValueError, + "Boxoban 'difficulty' string must be one of: basic, easy, medium, hard, unfiltered (got '%s')", + difficulty_name + ); + return -1; + } + } else { + PyErr_SetString( + PyExc_TypeError, + "Boxoban 'difficulty' must be an int (0..4) or string (basic/easy/medium/hard/unfiltered)" + ); + return -1; + } + } + *out_difficulty_id = difficulty_id; + return 0; +} + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + if (parse_difficulty_id(kwargs, &env->difficulty_id) != 0) { + return -1; + } + env->size = (int)unpack(kwargs, "size"); + env->max_steps = (int)unpack(kwargs, "max_steps"); + env->int_r_coeff = (float)unpack(kwargs, "int_r_coeff"); + env->target_loss_pen_coeff = (float)unpack(kwargs, "target_loss_pen_coeff"); + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "targets_hit", log->on_targets); + return 0; +} diff --git a/pufferlib/ocean/boxoban/boxoban.c b/pufferlib/ocean/boxoban/boxoban.c new file mode 100644 index 0000000000..b6a9a1d21b --- /dev/null +++ b/pufferlib/ocean/boxoban/boxoban.c @@ -0,0 +1,194 @@ +/* Pure C demo file for Boxoban. Usage: + * bash scripts/build_ocean.sh boxoban + * ./boxoban [difficulty|path_to_bin] + * + * If you pass one of the known difficulty names (basic, easy, medium, + * hard, unfiltered) the demo looks for pufferlib/ocean/boxoban/boxoban_maps_.bin + * Otherwise the argument is treated as an explicit path to a bin file. + */ + +#define BOXOBAN_MAPS_IMPLEMENTATION +#include "boxoban.h" + +static int is_named_difficulty(const char* arg) { + return strcmp(arg, "basic") == 0 || + strcmp(arg, "easy") == 0 || + strcmp(arg, "medium") == 0 || + strcmp(arg, "hard") == 0 || + strcmp(arg, "unfiltered") == 0; +} + +static const char* resolve_map_path(int argc, char** argv, char* buffer, size_t buf_sz) { + const char* arg = argc > 1 ? argv[1] : NULL; + if (arg == NULL) { + if (boxoban_prepare_maps_for_difficulty("easy", buffer, buf_sz) != 0) { + return NULL; + } + return buffer; + } + if (strchr(arg, '/')) { + return arg; + } + if (is_named_difficulty(arg)) { + if (boxoban_prepare_maps_for_difficulty(arg, buffer, buf_sz) != 0) { + return NULL; + } + return buffer; + } + snprintf(buffer, buf_sz, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", arg); + return buffer; +} + + +int demo(int argc, char** argv) { + char path_buffer[512]; + const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer)); + if (chosen_path == NULL) { + fprintf(stderr, "Failed to prepare map path\n"); + return 1; + } + if (boxoban_set_map_path(chosen_path) != 0) { + fprintf(stderr, "Failed to set map path: %s\n", chosen_path); + return 1; + } + + Boxoban env = { + .size = 10, + .observations = NULL, + .actions = NULL, + .rewards = NULL, + .terminals = NULL, + .max_steps = 500, + .int_r_coeff = 0.1f, + .target_loss_pen_coeff = 0.5f, + .tick = 0, + .agent_x = 0, + .agent_y = 0, + .intermediate_rewards = NULL, + .on_target = 0, + .n_boxes = 0, + .win = 0, + .difficulty_id = -1, + .client = NULL, + .n_targets = 0, + + }; + + size_t obs_count = 4u * (size_t)env.size * (size_t)env.size; + env.observations = calloc(obs_count, sizeof(unsigned char)); + env.actions = calloc(1, sizeof(int)); + env.rewards = calloc(1, sizeof(float)); + env.terminals = calloc(1, sizeof(unsigned char)); + + init(&env); + c_reset(&env); + c_render(&env); + while (!WindowShouldClose()) { + if (IsKeyPressed(KEY_LEFT_SHIFT) || IsKeyPressed(KEY_RIGHT_SHIFT)) { + TraceLog(LOG_INFO, "Shift key pressed"); + } + bool manual = IsKeyDown(KEY_LEFT_SHIFT) || IsKeyDown(KEY_RIGHT_SHIFT); + bool stepped = false; + if (manual) { + int new_action = -1; + if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) new_action = UP; + if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) new_action = DOWN; + if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) new_action = LEFT; + if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) new_action = RIGHT; + + if (new_action >= 0) { + env.actions[0] = new_action; + c_step(&env); + stepped = true; + } + } else { + env.actions[0] = rand() % 5; + c_step(&env); + stepped = true; + } + + if (!stepped) { + // Manual mode with no direction: stay paused + } + c_render(&env); + } + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); + return 0; +} + +void test_performance(int argc, char** argv, int timeout) { + char path_buffer[512]; + const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer)); + if (chosen_path == NULL) { + fprintf(stderr, "Failed to prepare map path\n"); + return; + } + if (boxoban_set_map_path(chosen_path) != 0) { + fprintf(stderr, "Failed to set map path: %s\n", chosen_path); + return; + } + printf("Loaded map: %s\n", chosen_path); + + Boxoban env = { + .size = 10, + .observations = NULL, + .actions = NULL, + .rewards = NULL, + .terminals = NULL, + .max_steps = 500, + .int_r_coeff = 0.1f, + .target_loss_pen_coeff = 0.5f, + .tick = 0, + .agent_x = 0, + .agent_y = 0, + .intermediate_rewards = NULL, + .on_target = 0, + .n_boxes = 0, + .win = 0, + .difficulty_id = -1, + .client = NULL, + .n_targets = 0, + }; + + size_t obs_count = 4u * (size_t)env.size * (size_t)env.size; + env.observations = calloc(obs_count, sizeof(unsigned char)); + env.actions = calloc(1, sizeof(int)); + env.rewards = calloc(1, sizeof(float)); + env.terminals = calloc(1, sizeof(unsigned char)); + + printf("Initializing...\n"); + init(&env); + printf("Resetting...\n"); + c_reset(&env); + printf("Starting test...\n"); + + int start = time(NULL); + int num_steps = 0; + while (time(NULL) - start < timeout) { + env.actions[0] = rand() % 5; + c_step(&env); + num_steps++; + } + + int end = time(NULL); + float sps = num_steps / (end - start); + printf("Test Environment SPS: %f\n", sps); + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); +} + +int main(int argc, char** argv) { + demo(argc, argv); + setbuf(stdout, NULL); + fprintf(stderr, "Entered main\n"); + fflush(stderr); + //test_performance(argc, argv,10); + return 0; +} diff --git a/pufferlib/ocean/boxoban/boxoban.h b/pufferlib/ocean/boxoban/boxoban.h new file mode 100644 index 0000000000..6e483ba9e2 --- /dev/null +++ b/pufferlib/ocean/boxoban/boxoban.h @@ -0,0 +1,418 @@ +#include +#include +#include +#include +#include "raylib.h" +#include "boxoban_maps.h" + +const unsigned char NOOP = 0; +const unsigned char DOWN = 1; +const unsigned char UP = 2; +const unsigned char LEFT = 3; +const unsigned char RIGHT = 4; + +const unsigned char AGENT = 0; +const unsigned char WALLS = 1; +const unsigned char BOXES = 2; +const unsigned char TARGET = 3; + +// Required struct. Only use floats! +typedef struct { + float perf; // Recommended 0-1 normalized single real number perf metric + float score; // Recommended unnormalized single real number perf metric + float episode_return; // Recommended metric: sum of agent rewards over episode + float episode_length; // Recommended metric: number of steps of agent episode + // Any extra fields you add here may be exported to Python in binding.c + float on_targets; // Number of targets currently boxed + float n; // Required as the last field +} Log; + +typedef struct { + Texture2D wall; + Texture2D box; + Texture2D target; + Texture2D floor; + Texture2D agent; + Texture2D box_on_target; +} Client; + +// Required that you have some struct for your env +// Recommended that you name it the same as the env file +typedef struct { + Log log; // Required field. Env binding code uses this to aggregate logs + unsigned char* observations; // Required. You can use any obs type, but make sure it matches in Python! + int* actions; // Required. int* for discrete/multidiscrete, float* for box + float* rewards; // Required + unsigned char* terminals; // Required. We don't yet have truncations as standard yet + int size; + int tick; + int max_steps; + int agent_x; + int agent_y; + unsigned char* intermediate_rewards; + float int_r_coeff; + float target_loss_pen_coeff; + int on_target; //num targets currently boxed + int n_boxes; //boxes in map + int n_targets; //targets in map + int difficulty_id; // 0=basic,1=easy,2=medium,3=hard,4=unfiltered + Client* client; + int win; +} Boxoban; + +void ensure_map_loaded(void); + +static int boxoban_configure_maps_from_env(Boxoban* env) { + if (env->difficulty_id == -1) { + return 0; + } + + if (env->difficulty_id < -1) { + fprintf(stderr, "Invalid Boxoban difficulty id %d\n", env->difficulty_id); + return -1; + } + + const char* difficulty_name = boxoban_difficulty_name_from_id(env->difficulty_id); + if (difficulty_name == NULL) { + fprintf(stderr, "Invalid Boxoban difficulty id %d\n", env->difficulty_id); + return -1; + } + char prepared_path[512]; + if (boxoban_prepare_maps_for_difficulty(difficulty_name, prepared_path, sizeof(prepared_path)) != 0) { + return -1; + } + + return 0; +} + +//Entity,x,y convention y moves top to bottom + +static inline void set_entity(Boxoban *env, int entity, int x, int y, unsigned char value) { + env->observations[(entity)*env->size*env->size + (y)*env->size + (x)] = value; +} + +static inline unsigned char get_entity(Boxoban *env, int entity, int x, int y) { + return env->observations[(entity)*env->size*env->size + (y)*env->size + (x)]; +} + +static inline void set_intermediate_reward(Boxoban *env, int x, int y, unsigned char value) { + env->intermediate_rewards[(y)*env->size + (x)] = value; +} + +static inline unsigned char get_intermediate_reward_status(Boxoban *env, int x, int y) { + return env->intermediate_rewards[(y)*env->size + (x)]; +} + +static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) { + int idx = rand() % PUZZLE_COUNT; + return idx; +} + + +void init (Boxoban* env) { + if (boxoban_configure_maps_from_env(env) != 0) { + fprintf(stderr, "Failed to configure Boxoban maps\n"); + abort(); + } + ensure_map_loaded(); + env->intermediate_rewards = calloc(env->size*env->size, sizeof(unsigned char)); + env->win = 0; + } + + +void add_log(Boxoban* env) { + float denom = (float)env->n_boxes; + float num = (float)env->on_target; + env->log.perf += (env->win== 1) ? 1.0 : num/denom; + env->log.score += env->rewards[0]; + env->log.episode_length += env->tick; + env->log.episode_return += env->rewards[0]; + env->log.on_targets += env->on_target; + env->log.n++; +} + + +bool clear(Boxoban* env, int x, int y) { + if (x < 0 || y < 0 || x >= env->size || y >= env->size) { + return false; + } + return (get_entity(env, WALLS, x, y) == 0) && (get_entity(env, BOXES, x, y) == 0); +} + +// Required function +void c_reset(Boxoban* env) { + const uint32_t i = get_random_puzzle_idx(env); + const uint8_t* puzzle = MAP_BASE + (size_t)i * PUZZLE_SIZE; + memcpy(env->observations, puzzle, PUZZLE_OBS_BYTES); + + const uint8_t* meta = puzzle + PUZZLE_OBS_BYTES; + env->agent_x = (int)meta[0]; + env->agent_y = (int)meta[1]; + env->n_boxes = (int)meta[2]; + env->n_targets = (int)meta[3]; + env->on_target = (int)meta[4]; + + memcpy(env->intermediate_rewards, + env->observations + TARGET * env->size * env->size,env->size * env->size); + + env->tick = 0; + env->win = 0; + +} + +//Updates OBS for moved entity +void move_entity(Boxoban* env,unsigned char entity,int x, int y, int dx, int dy) { + set_entity(env, entity, x, y, 0); + set_entity(env, entity, x + dx, y + dy, 1); +} + +//Updates state and intermediate reward array in place +int take_action(Boxoban* env, int action) { + + int dx = 0; + int dy = 0; + int int_r = 0; + + if (action == NOOP) { + return 0; + } + else if (action == DOWN) { + dy = 1; + } + else if (action == UP) { + dy = -1; + } + else if (action == LEFT) { + dx = -1; + } + else if (action == RIGHT) { + dx = 1; + } + + //if move space is clear, move agent + if (clear(env, env->agent_x + dx, env->agent_y + dy)) { + + move_entity(env, AGENT, env->agent_x, env->agent_y, dx, dy); + env->agent_y += dy; + env->agent_x += dx; + return 0; + } + //if its not clear, but its a box and box is clear to move, move both + else if (clear(env, env->agent_x+ 2*dx, env->agent_y + 2*dy) + && get_entity(env, BOXES, env->agent_x + dx, env->agent_y + dy) == 1) { + + //if box is on target currently, remove from on_target count + if (get_entity(env, TARGET, env->agent_x + dx, env->agent_y + dy) == 1) { + + env->on_target -= 1; + } + //move both entities + move_entity(env, BOXES, env->agent_x + dx, env->agent_y + dy, dx, dy); + move_entity(env, AGENT, env->agent_x, env->agent_y, dx, dy); + env->agent_y += dy; + env->agent_x += dx; + + //if box is now on target, add to on_target count + //if its a new target recieve intermediate reward and zero out intermediate reward + if (get_entity(env, TARGET, env->agent_x + dx, env->agent_y + dy) == 1) { + + env->on_target += 1; + int_r = get_intermediate_reward_status(env, env->agent_x + dx, env->agent_y + dy); + set_intermediate_reward(env, env->agent_x + dx, env->agent_y + dy, 0); + } + return int_r; + } + return 0; +} + +// Required function +void c_step(Boxoban* env) { + env->tick += 1; + env->terminals[0] = 0; + env->rewards[0] = 0.0; + + int action = env->actions[0]; + + float on_target = env->on_target; + int int_r = take_action(env, action); //int_r _new_ tgts covered, modifies observations in place + float on_target_after = env->on_target; + + env->rewards[0] += (float)int_r * env->int_r_coeff; //coeff in .ini + + if (on_target_after < on_target) { //target loss penalty + env->rewards[0] -= env->target_loss_pen_coeff; //coeff in .ini + } + + //Terminals + if (env->on_target == env->n_targets) { + env->terminals[0] = 1; + env->rewards[0] += 1.0; + env->win = 1; + add_log(env); + c_reset(env); + return; + } + + if (env->tick >= env->max_steps) { + env->terminals[0] = 1; + env->rewards[0] -= 1.0; + add_log(env); + c_reset(env); + return; + } + +} + +/*Rendering stuff*/ + +Client* c_create(Boxoban* env) { + Client* client = calloc(1,sizeof(Client)); + const char *sprite_search_paths[] = { + "sprites_pack/PNG", + "pufferlib/ocean/boxoban/sprites_pack/PNG", + "../pufferlib/ocean/boxoban/sprites_pack/PNG", + }; + const char *sprite_base = NULL; + for (unsigned i = 0; i < sizeof(sprite_search_paths)/sizeof(sprite_search_paths[0]); i++) { + if (DirectoryExists(sprite_search_paths[i])) { + sprite_base = sprite_search_paths[i]; + break; + } + } + if (sprite_base == NULL) { + TraceLog(LOG_WARNING, "Boxoban sprites not found next to executable, using default relative path"); + sprite_base = "sprites_pack/PNG"; + } + + char resource_path[256] = {0}; + + snprintf(resource_path, sizeof(resource_path), "%s/Wall_black.png", sprite_base); + client->wall = LoadTexture(resource_path); + snprintf(resource_path, sizeof(resource_path), "%s/Crate_black.png", sprite_base); + client->box = LoadTexture(resource_path); + snprintf(resource_path, sizeof(resource_path), "%s/EndPoint_black.png", sprite_base); + client->target = LoadTexture(resource_path); + snprintf(resource_path, sizeof(resource_path), "%s/GroundGravel_Concrete.png", sprite_base); + client->floor = LoadTexture(resource_path); + snprintf(resource_path, sizeof(resource_path), "%s/EndPoint_Blue.png", sprite_base); + client->box_on_target = LoadTexture(resource_path); + client->agent = LoadTexture("resources/shared/puffers_128.png"); + + env-> client = client; + return client; +} + +#define TILE 32 + +Texture2D choose_sprite(Client *c, Boxoban *env, int x, int y) { + int a = get_entity(env, AGENT, x, y); + int w = get_entity(env, WALLS, x, y); + int b = get_entity(env, BOXES, x, y); + int t = get_entity(env, TARGET, x, y); + + if (w) return c->wall; + if (b && t) return c->box_on_target; + if (b) return c->box; + if (a) return c->agent; + if (t) return c->target; + + return c->floor; +} + +void draw_tile(Boxoban *env, int x, int y) { + Client *c = env->client; + Rectangle dest = {x * TILE, y * TILE, TILE, TILE}; + + // Always lay down the base tile + DrawTexturePro( + c->floor, + (Rectangle){0, 0, (float)c->floor.width, (float)c->floor.height}, + dest, + (Vector2){0, 0}, + 0.0f, + WHITE); + + if (get_entity(env, TARGET, x, y)) { + DrawTexturePro( + c->target, + (Rectangle){0, 0, (float)c->target.width, (float)c->target.height}, + dest, + (Vector2){0, 0}, + 0.0f, + WHITE); + } + if (get_entity(env, BOXES, x, y)) { + Texture2D tex = get_entity(env, TARGET, x, y) ? c->box_on_target : c->box; + DrawTexturePro( + tex, + (Rectangle){0, 0, (float)tex.width, (float)tex.height}, + dest, + (Vector2){0, 0}, + 0.0f, + WHITE); + } + if (get_entity(env, WALLS, x, y)) { + DrawTexturePro( + c->wall, + (Rectangle){0, 0, (float)c->wall.width, (float)c->wall.height}, + dest, + (Vector2){0, 0}, + 0.0f, + WHITE); + } + if (get_entity(env, AGENT, x, y)) { + Rectangle src = {0, 0, c->agent.width / 2.0f, (float)c->agent.height}; + DrawTexturePro(c->agent, src, dest, (Vector2){0, 0}, 0.0f, WHITE); + } + } + + +// Required function. Should handle creating the client on first call +void c_render(Boxoban* env) { + if (!IsWindowReady()) { + InitWindow(TILE*env->size, TILE*env->size, "PufferLib Boxoban"); + SetTargetFPS(10); + } + + // Standard across our envs so exiting is always the same + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + if (env->client == NULL) { + env->client = c_create(env); + } + + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + + for (int y = 0; y < env->size; y++) { + for (int x = 0; x < env->size; x++) { + draw_tile(env, x, y); + } + } + + + EndDrawing(); +} + +// Required function. Should clean up anything you allocated +// Do not free env->observations, actions, rewards, terminals +void c_close(Boxoban* env) { + if (env->intermediate_rewards) { + free(env->intermediate_rewards); + env->intermediate_rewards = NULL; + } + if (IsWindowReady()) { + if (env->client) { + UnloadTexture(env->client->wall); + UnloadTexture(env->client->box); + UnloadTexture(env->client->target); + UnloadTexture(env->client->floor); + UnloadTexture(env->client->agent); + free(env->client); + env->client = NULL; + } + CloseWindow(); + } +} diff --git a/pufferlib/ocean/boxoban/boxoban.py b/pufferlib/ocean/boxoban/boxoban.py new file mode 100644 index 0000000000..1c1ff460c0 --- /dev/null +++ b/pufferlib/ocean/boxoban/boxoban.py @@ -0,0 +1,71 @@ +import gymnasium +import numpy as np + +import pufferlib +from pufferlib.ocean.boxoban import binding + +class Boxoban(pufferlib.PufferEnv): + def __init__(self, num_envs=1, render_mode=None, log_interval=128, size=10, buf=None, seed=0, difficulty=0, max_steps = 500,int_r_coeff = 0.1, target_loss_pen_coeff = 0.5): + self.shape = size*size*4 #agents walls boxes targets OHE + self.difficulty = difficulty + self._difficulty_stat_key = f"difficulty ({self.difficulty})" + self.single_observation_space = gymnasium.spaces.Box(low=0, high=1, + shape=(self.shape,), dtype=np.uint8) + self.single_action_space = gymnasium.spaces.Discrete(5) + self.render_mode = render_mode + self.num_agents = num_envs + self.log_interval = log_interval + self.max_steps = max_steps + self.int_r_coeff = int_r_coeff + self.target_loss_pen_coeff = target_loss_pen_coeff + + super().__init__(buf) + self.c_envs = binding.vec_init(self.observations, self.actions, self.rewards, + self.terminals, self.truncations, num_envs, seed, size=size, max_steps = self.max_steps, int_r_coeff = self.int_r_coeff, target_loss_pen_coeff = self.target_loss_pen_coeff, difficulty=self.difficulty) + + def reset(self, seed=0): + binding.vec_reset(self.c_envs, seed) + self.tick = 0 + return self.observations, [] + + def step(self, actions): + self.tick += 1 + + self.actions[:] = actions + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.log_interval == 0: + log_dict = binding.vec_log(self.c_envs) + log_dict[self._difficulty_stat_key] = 1.0 + info.append(log_dict) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + +if __name__ == '__main__': + N = 1 + + env = Boxoban(num_envs=N) + env.reset() + env.render() + steps = 0 + + CACHE = 1024 + actions = np.random.randint(0, 5, (CACHE, N)) + + i = 0 + import time + start = time.time() + while time.time() - start < 10: + env.step(actions[i % CACHE]) + steps += N + i += 1 + + print('Boxoban SPS:', int(steps / (time.time() - start))) diff --git a/pufferlib/ocean/boxoban/boxoban_maps.h b/pufferlib/ocean/boxoban/boxoban_maps.h new file mode 100644 index 0000000000..437043d454 --- /dev/null +++ b/pufferlib/ocean/boxoban/boxoban_maps.h @@ -0,0 +1,453 @@ +#ifndef PUFFERLIB_OCEAN_BOXOBAN_MAPS_H +#define PUFFERLIB_OCEAN_BOXOBAN_MAPS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "generate_maps.h" +#include "parse_maps.h" + +/* +Maps are stored in binary files keyed by difficulty. +If the bin does not exist it is created on the fly, then mmapped and shared by envs. +*/ + +extern uint8_t *MAP_BASE; +extern size_t MAP_FILESIZE; +extern size_t PUZZLE_COUNT; +extern size_t PUZZLE_SIZE; +extern size_t PUZZLE_OBS_BYTES; + +int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap); +int boxoban_set_map_path(const char *path); +int boxoban_difficulty_id_from_name(const char* difficulty_name); +const char* boxoban_difficulty_name_from_id(int difficulty_id); +void ensure_map_loaded(void); + +#ifdef BOXOBAN_MAPS_IMPLEMENTATION + +uint8_t *MAP_BASE = NULL; +size_t MAP_FILESIZE = 0; +size_t PUZZLE_COUNT = 0; +size_t PUZZLE_SIZE = BOXOBAN_PUZZLE_BYTES; +size_t PUZZLE_OBS_BYTES = BOXOBAN_PUZZLE_OBS_BYTES; +static char* BOXOBAN_MAP_PATH = NULL; +static const char* BOXOBAN_LEVEL_ROOT = "pufferlib/ocean/boxoban/boxoban-levels"; + +typedef struct { + char** items; + size_t count; + size_t cap; +} BoxobanPathList; + +static int boxoban_cmp_strings(const void* a, const void* b) { + const char* const* sa = (const char* const*)a; + const char* const* sb = (const char* const*)b; + return strcmp(*sa, *sb); +} + +static void boxoban_path_list_free(BoxobanPathList* list) { + for (size_t i = 0; i < list->count; i++) { + free(list->items[i]); + } + free(list->items); + list->items = NULL; + list->count = 0; + list->cap = 0; +} + +static int boxoban_path_list_append(BoxobanPathList* list, const char* path) { + if (list->count == list->cap) { + size_t next_cap = list->cap == 0 ? 64 : list->cap * 2; + char** next = (char**)realloc(list->items, next_cap * sizeof(char*)); + if (next == NULL) { + return -1; + } + list->items = next; + list->cap = next_cap; + } + char* copied = (char*)malloc(strlen(path) + 1); + if (copied == NULL) { + return -1; + } + strcpy(copied, path); + list->items[list->count++] = copied; + return 0; +} + +static int boxoban_has_txt_suffix(const char* name) { + size_t len = strlen(name); + return len >= 4 && strcmp(name + len - 4, ".txt") == 0; +} + +int boxoban_difficulty_id_from_name(const char* difficulty_name) { + if (difficulty_name == NULL) { + return -1; + } + + if (strcmp(difficulty_name, "basic") == 0) { + return 0; + } + if (strcmp(difficulty_name, "easy") == 0) { + return 1; + } + if (strcmp(difficulty_name, "medium") == 0) { + return 2; + } + if (strcmp(difficulty_name, "hard") == 0) { + return 3; + } + if (strcmp(difficulty_name, "unfiltered") == 0) { + return 4; + } + + return -1; +} + +const char* boxoban_difficulty_name_from_id(int difficulty_id) { + switch (difficulty_id) { + case 0: + return "basic"; + case 1: + return "easy"; + case 2: + return "medium"; + case 3: + return "hard"; + case 4: + return "unfiltered"; + default: + return NULL; + } +} + +static int boxoban_dir_has_txt(const char* dir_path) { + DIR* dir = opendir(dir_path); + if (dir == NULL) { + return 0; + } + struct dirent* ent; + while ((ent = readdir(dir)) != NULL) { + if (boxoban_has_txt_suffix(ent->d_name)) { + closedir(dir); + return 1; + } + } + closedir(dir); + return 0; +} + +static int boxoban_collect_sorted_txt_paths_in_dir(const char* dir_path, BoxobanPathList* out_paths) { + DIR* dir = opendir(dir_path); + if (dir == NULL) { + fprintf(stderr, "Missing level directory %s\n", dir_path); + return -1; + } + + BoxobanPathList names = {0}; + struct dirent* ent; + while ((ent = readdir(dir)) != NULL) { + if (!boxoban_has_txt_suffix(ent->d_name)) { + continue; + } + if (boxoban_path_list_append(&names, ent->d_name) != 0) { + boxoban_path_list_free(&names); + closedir(dir); + return -1; + } + } + closedir(dir); + + qsort(names.items, names.count, sizeof(char*), boxoban_cmp_strings); + for (size_t i = 0; i < names.count; i++) { + char full_path[1400]; + snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, names.items[i]); + if (boxoban_path_list_append(out_paths, full_path) != 0) { + boxoban_path_list_free(&names); + return -1; + } + } + boxoban_path_list_free(&names); + return 0; +} + +static int boxoban_collect_maps_from_dir(const char* rel_path, BoxobanPathList* out_paths) { + char level_dir[1400]; + struct stat st; + + snprintf(level_dir, sizeof(level_dir), "%s/%s", BOXOBAN_LEVEL_ROOT, rel_path); + if (stat(level_dir, &st) != 0 || !S_ISDIR(st.st_mode)) { + fprintf(stderr, "Missing level directory %s\n", level_dir); + return -1; + } + + return boxoban_collect_sorted_txt_paths_in_dir(level_dir, out_paths); +} + +static int boxoban_collect_maps(const char* difficulty, BoxobanPathList* out_paths) { + if (strcmp(difficulty, "basic") == 0) { + return boxoban_collect_maps_from_dir("basic/train", out_paths); + } + if (strcmp(difficulty, "easy") == 0) { + return boxoban_collect_maps_from_dir("easy/train", out_paths); + } + if (strcmp(difficulty, "medium") == 0) { + return boxoban_collect_maps_from_dir("medium/train", out_paths); + } + if (strcmp(difficulty, "hard") == 0) { + return boxoban_collect_maps_from_dir("hard", out_paths); + } + if (strcmp(difficulty, "unfiltered") == 0) { + return boxoban_collect_maps_from_dir("unfiltered/train", out_paths); + } + + fprintf(stderr, "Invalid difficulty '%s'\n", difficulty); + return -1; +} + +static int boxoban_download_text_maps(const char* difficulty) { + char zip_url[512]; + snprintf(zip_url, sizeof(zip_url), + "https://raw.githubusercontent.com/TBBristol/pufferlib_boxoban_levels/main/%s.zip", + difficulty); + fprintf(stdout, "[Boxoban] Downloading %s maps from %s\n", difficulty, zip_url); + + char tmp_template[] = "/tmp/boxoban_maps_XXXXXX"; + char* tmp_dir = mkdtemp(tmp_template); + if (tmp_dir == NULL) { + return -1; + } + + char zip_path[1400]; + snprintf(zip_path, sizeof(zip_path), "%s/%s.zip", tmp_dir, difficulty); + + char cmd[4096]; + snprintf(cmd, sizeof(cmd), "curl -L --fail -o '%s' '%s' > /dev/null 2>&1", zip_path, zip_url); + if (system(cmd) != 0) { + fprintf(stderr, "Failed to download Boxoban maps with curl\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "unzip -q '%s' -d '%s'", zip_path, tmp_dir); + if (system(cmd) != 0) { + fprintf(stderr, "Failed to unzip Boxoban maps archive\n"); + return -1; + } + + char extracted_root[1400] = {0}; + char find_cmd[4096]; + snprintf(find_cmd, sizeof(find_cmd), "find '%s' -type d -name '%s' | head -n 1", tmp_dir, difficulty); + FILE* find_pipe = popen(find_cmd, "r"); + if (find_pipe == NULL) { + return -1; + } + if (fgets(extracted_root, sizeof(extracted_root), find_pipe) == NULL) { + pclose(find_pipe); + fprintf(stderr, "Downloaded zip missing '%s' directory\n", difficulty); + return -1; + } + pclose(find_pipe); + extracted_root[strcspn(extracted_root, "\r\n")] = '\0'; + + char dest_root[1400]; + snprintf(dest_root, sizeof(dest_root), "%s/%s", BOXOBAN_LEVEL_ROOT, difficulty); + if (boxoban_mkdir_p(dest_root) != 0) { + return -1; + } + + snprintf(cmd, sizeof(cmd), "cp -R '%s/.' '%s/'", extracted_root, dest_root); + if (system(cmd) != 0) { + fprintf(stderr, "Failed to copy downloaded maps into %s\n", dest_root); + return -1; + } + return 0; +} + +static int boxoban_ensure_text_maps(const char* difficulty) { + if (strcmp(difficulty, "basic") == 0) { + char output_dir[1400]; + snprintf(output_dir, sizeof(output_dir), "%s/basic/train", BOXOBAN_LEVEL_ROOT); + if (boxoban_dir_has_txt(output_dir)) { + return 0; + } + fprintf(stdout, "[Boxoban] Generating basic maps at %s\n", output_dir); + return boxoban_generate_basic_maps(output_dir, 0); + } + if (strcmp(difficulty, "easy") == 0) { + char output_dir[1400]; + snprintf(output_dir, sizeof(output_dir), "%s/easy/train", BOXOBAN_LEVEL_ROOT); + if (boxoban_dir_has_txt(output_dir)) { + return 0; + } + fprintf(stdout, "[Boxoban] Generating easy maps at %s\n", output_dir); + return boxoban_generate_easy_maps(output_dir, 0); + } + if (strcmp(difficulty, "medium") == 0) { + char level_dir[1400]; + snprintf(level_dir, sizeof(level_dir), "%s/medium/train", BOXOBAN_LEVEL_ROOT); + if (boxoban_dir_has_txt(level_dir)) { + return 0; + } + return boxoban_download_text_maps(difficulty); + } + if (strcmp(difficulty, "hard") == 0) { + char level_dir[1400]; + snprintf(level_dir, sizeof(level_dir), "%s/hard", BOXOBAN_LEVEL_ROOT); + if (boxoban_dir_has_txt(level_dir)) { + return 0; + } + return boxoban_download_text_maps(difficulty); + } + if (strcmp(difficulty, "unfiltered") == 0) { + char level_dir[1400]; + snprintf(level_dir, sizeof(level_dir), "%s/unfiltered/train", BOXOBAN_LEVEL_ROOT); + if (boxoban_dir_has_txt(level_dir)) { + return 0; + } + return boxoban_download_text_maps(difficulty); + } + + return boxoban_download_text_maps(difficulty); +} + +static int boxoban_bin_path(const char* difficulty, char* out_path, size_t out_cap) { + int written = snprintf(out_path, out_cap, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", difficulty); + if (written <= 0 || (size_t)written >= out_cap) { + return -1; + } + return 0; +} + +int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) { + if (difficulty == NULL || out_path == NULL) { + return -1; + } + if (boxoban_difficulty_id_from_name(difficulty) < 0) { + return -1; + } + if (boxoban_bin_path(difficulty, out_path, out_cap) != 0) { + return -1; + } + + if (access(out_path, F_OK) != 0) { + if (boxoban_ensure_text_maps(difficulty) != 0) { + return -1; + } + + BoxobanPathList maps = {0}; + size_t puzzle_count = 0; + if (boxoban_collect_maps(difficulty, &maps) != 0) { + boxoban_path_list_free(&maps); + return -1; + } + + if (boxoban_write_bin_from_files((const char* const*)maps.items, maps.count, out_path, 0, &puzzle_count) != 0) { + boxoban_path_list_free(&maps); + return -1; + } + boxoban_path_list_free(&maps); + fprintf(stdout, "[Boxoban] Generated %zu puzzles for '%s' at %s\n", puzzle_count, difficulty, out_path); + } + + if (boxoban_set_map_path(out_path) != 0) { + return -1; + } + return 0; +} + +static void reset_map_cache(void) { + if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) { + munmap(MAP_BASE, MAP_FILESIZE); + } + MAP_BASE = NULL; + MAP_FILESIZE = 0; + PUZZLE_COUNT = 0; +} + +int boxoban_set_map_path(const char *path) { + if (path == NULL) { + return -1; + } + if (BOXOBAN_MAP_PATH != NULL && strcmp(BOXOBAN_MAP_PATH, path) == 0) { + return 0; + } + + char* copied = malloc(strlen(path) + 1); + if (copied == NULL) { + return -1; + } + strcpy(copied, path); + + reset_map_cache(); + free(BOXOBAN_MAP_PATH); + BOXOBAN_MAP_PATH = copied; + return 0; +} + +static const char* get_default_map_path(void) { + const char* env_path = getenv("BOXOBAN_MAP_BIN"); + if (env_path != NULL) { + return env_path; + } + return NULL; +} + +void ensure_map_loaded(void) { + if (MAP_BASE != NULL) { + return; + } + + if (BOXOBAN_MAP_PATH == NULL) { + const char* default_path = get_default_map_path(); + if (default_path != NULL) { + if (boxoban_set_map_path(default_path) != 0) { + fprintf(stderr, "Failed to set default Boxoban map path\n"); + abort(); + } + } else { + char prepared_path[512]; + if (boxoban_prepare_maps_for_difficulty("basic", prepared_path, sizeof(prepared_path)) != 0) { + fprintf(stderr, "Failed to prepare default Boxoban maps\n"); + abort(); + } + } + } + + int fd = open(BOXOBAN_MAP_PATH, O_RDONLY); + if (fd < 0) { + perror("open"); + abort(); + } + struct stat st; + if (fstat(fd, &st) != 0) { + perror("fstat"); + abort(); + } + + MAP_FILESIZE = st.st_size; + if (MAP_FILESIZE % PUZZLE_SIZE != 0) { + fprintf(stderr, "Invalid Boxoban map file size %zu (expected multiple of %zu)\n", + MAP_FILESIZE, PUZZLE_SIZE); + abort(); + } + PUZZLE_COUNT = MAP_FILESIZE / PUZZLE_SIZE; + + MAP_BASE = mmap(NULL, MAP_FILESIZE, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (MAP_BASE == MAP_FAILED) { + perror("mmap"); + abort(); + } +} + +#endif + +#endif diff --git a/pufferlib/ocean/boxoban/generate_maps.h b/pufferlib/ocean/boxoban/generate_maps.h new file mode 100644 index 0000000000..cdd39a10b7 --- /dev/null +++ b/pufferlib/ocean/boxoban/generate_maps.h @@ -0,0 +1,368 @@ +#ifndef PUFFERLIB_OCEAN_BOXOBAN_GENERATE_MAPS_H +#define PUFFERLIB_OCEAN_BOXOBAN_GENERATE_MAPS_H + +#include +#include +#include +#include +#include +#include +#include + +#define BOXOBAN_GEN_AGENT '@' +#define BOXOBAN_GEN_WALL '#' +#define BOXOBAN_GEN_BOX '$' +#define BOXOBAN_GEN_TARGET '.' +#define BOXOBAN_GEN_FLOOR ' ' + +typedef struct { + int r; + int c; +} BoxobanCell; + +typedef struct { + uint64_t state; +} BoxobanRandom; + +static int boxoban_mkdir_p(const char* dir_path) { + char tmp[1024]; + size_t len = strlen(dir_path); + if (len >= sizeof(tmp)) { + return -1; + } + + memcpy(tmp, dir_path, len + 1); + for (size_t i = 1; i < len; i++) { + if (tmp[i] == '/') { + tmp[i] = '\0'; + if (mkdir(tmp, 0777) != 0 && errno != EEXIST) { + return -1; + } + tmp[i] = '/'; + } + } + if (mkdir(tmp, 0777) != 0 && errno != EEXIST) { + return -1; + } + return 0; +} + +static void boxoban_seed(BoxobanRandom* rng, uint64_t seed) { + rng->state = seed ? seed : 0x9e3779b97f4a7c15ULL; +} + +static uint64_t boxoban_next_u64(BoxobanRandom* rng) { + uint64_t x = rng->state; + x ^= x >> 12; + x ^= x << 25; + x ^= x >> 27; + rng->state = x; + return x * 2685821657736338717ULL; +} + +static uint32_t boxoban_randbelow(BoxobanRandom* rng, uint32_t n) { + if (n == 0) { + return 0; + } + + uint64_t threshold = (uint64_t)(-(int64_t)n) % (uint64_t)n; + for (;;) { + uint64_t r = boxoban_next_u64(rng); + if (r >= threshold) { + return (uint32_t)(r % n); + } + } +} + +static int boxoban_randint(BoxobanRandom* rng, int a, int b) { + return a + (int)boxoban_randbelow(rng, (uint32_t)(b - a + 1)); +} + +static int boxoban_choice_index(BoxobanRandom* rng, int n) { + return (int)boxoban_randbelow(rng, (uint32_t)n); +} + +static int boxoban_sample_indices(BoxobanRandom* rng, int n, int k, int* out_indices) { + int* pool = (int*)malloc((size_t)n * sizeof(int)); + if (pool == NULL) { + return -1; + } + + for (int i = 0; i < n; i++) { + pool[i] = i; + } + + for (int i = 0; i < k; i++) { + int j = i + (int)boxoban_randbelow(rng, (uint32_t)(n - i)); + int tmp = pool[i]; + pool[i] = pool[j]; + pool[j] = tmp; + out_indices[i] = pool[i]; + } + + free(pool); + return 0; +} + +static inline int boxoban_grid_idx(int size, int r, int c) { + return r * size + c; +} + +static int boxoban_is_inside(int size, int x, int y) { + return x >= 0 && x < size && y >= 0 && y < size; +} + +static int boxoban_is_pushable(const char* grid, int size, int x, int y) { + static const int dirs[4][2] = {{1,0}, {-1,0}, {0,1}, {0,-1}}; + for (int d = 0; d < 4; d++) { + int dx = dirs[d][0]; + int dy = dirs[d][1]; + int px = x - dx; + int py = y - dy; + int tx = x + dx; + int ty = y + dy; + if (!boxoban_is_inside(size, px, py) || !boxoban_is_inside(size, tx, ty)) { + continue; + } + char pre = grid[boxoban_grid_idx(size, py, px)]; + char post = grid[boxoban_grid_idx(size, ty, tx)]; + if ((pre == BOXOBAN_GEN_FLOOR || pre == BOXOBAN_GEN_TARGET) && + (post == BOXOBAN_GEN_FLOOR || post == BOXOBAN_GEN_TARGET)) { + return 1; + } + } + return 0; +} + +static void boxoban_build_border_grid(char* grid, int size) { + for (int r = 0; r < size; r++) { + for (int c = 0; c < size; c++) { + grid[boxoban_grid_idx(size, r, c)] = BOXOBAN_GEN_FLOOR; + } + } + for (int i = 0; i < size; i++) { + grid[boxoban_grid_idx(size, 0, i)] = BOXOBAN_GEN_WALL; + grid[boxoban_grid_idx(size, size - 1, i)] = BOXOBAN_GEN_WALL; + grid[boxoban_grid_idx(size, i, 0)] = BOXOBAN_GEN_WALL; + grid[boxoban_grid_idx(size, i, size - 1)] = BOXOBAN_GEN_WALL; + } +} + +static int boxoban_build_cells(int size, int margin, BoxobanCell* out_cells) { + int count = 0; + int start = 1 + margin; + int end = size - 1 - margin; + for (int r = start; r < end; r++) { + for (int c = start; c < end; c++) { + out_cells[count].r = r; + out_cells[count].c = c; + count++; + } + } + return count; +} + +static int boxoban_make_puzzle( + int size, + BoxobanRandom* rng, + int num_boxes, + int max_attempts, + const BoxobanCell* agent_choices, + int agent_count, + const BoxobanCell* confined, + int confined_count, + int interior_count, + char* grid +) { + if (num_boxes < 1) { + fprintf(stderr, "num_boxes must be at least 1\n"); + return -1; + } + + int needed = num_boxes * 2 + 1; + if (needed > interior_count) { + fprintf(stderr, "Grid interior only has %d cells, cannot place %d objects\n", interior_count, needed); + return -1; + } + + BoxobanCell* box_candidates = (BoxobanCell*)malloc((size_t)confined_count * sizeof(BoxobanCell)); + BoxobanCell* box_positions = (BoxobanCell*)malloc((size_t)num_boxes * sizeof(BoxobanCell)); + BoxobanCell* agent_candidates = (BoxobanCell*)malloc((size_t)agent_count * sizeof(BoxobanCell)); + int* sampled_idx = (int*)malloc((size_t)num_boxes * sizeof(int)); + uint8_t* occupied = (uint8_t*)calloc((size_t)size * (size_t)size, sizeof(uint8_t)); + if (box_candidates == NULL || box_positions == NULL || agent_candidates == NULL || sampled_idx == NULL || occupied == NULL) { + free(box_candidates); + free(box_positions); + free(agent_candidates); + free(sampled_idx); + free(occupied); + return -1; + } + + for (int attempt = 0; attempt < max_attempts; attempt++) { + boxoban_build_border_grid(grid, size); + memset(occupied, 0, (size_t)size * (size_t)size); + + if (boxoban_sample_indices(rng, confined_count, num_boxes, sampled_idx) != 0) { + free(box_candidates); + free(box_positions); + free(agent_candidates); + free(sampled_idx); + free(occupied); + return -1; + } + + for (int i = 0; i < num_boxes; i++) { + BoxobanCell cell = confined[sampled_idx[i]]; + grid[boxoban_grid_idx(size, cell.r, cell.c)] = BOXOBAN_GEN_TARGET; + occupied[boxoban_grid_idx(size, cell.r, cell.c)] = 1; + } + + int box_candidate_count = 0; + for (int i = 0; i < confined_count; i++) { + BoxobanCell cell = confined[i]; + if (!occupied[boxoban_grid_idx(size, cell.r, cell.c)]) { + box_candidates[box_candidate_count++] = cell; + } + } + if (box_candidate_count < num_boxes) { + continue; + } + + if (boxoban_sample_indices(rng, box_candidate_count, num_boxes, sampled_idx) != 0) { + free(box_candidates); + free(box_positions); + free(agent_candidates); + free(sampled_idx); + free(occupied); + return -1; + } + for (int i = 0; i < num_boxes; i++) { + BoxobanCell cell = box_candidates[sampled_idx[i]]; + box_positions[i] = cell; + grid[boxoban_grid_idx(size, cell.r, cell.c)] = BOXOBAN_GEN_BOX; + occupied[boxoban_grid_idx(size, cell.r, cell.c)] = 1; + } + + int agent_candidate_count = 0; + for (int i = 0; i < agent_count; i++) { + BoxobanCell cell = agent_choices[i]; + if (!occupied[boxoban_grid_idx(size, cell.r, cell.c)]) { + agent_candidates[agent_candidate_count++] = cell; + } + } + if (agent_candidate_count == 0) { + continue; + } + + BoxobanCell agent_cell = agent_candidates[boxoban_choice_index(rng, agent_candidate_count)]; + grid[boxoban_grid_idx(size, agent_cell.r, agent_cell.c)] = BOXOBAN_GEN_AGENT; + + int all_pushable = 1; + for (int i = 0; i < num_boxes; i++) { + BoxobanCell cell = box_positions[i]; + if (!boxoban_is_pushable(grid, size, cell.c, cell.r)) { + all_pushable = 0; + break; + } + } + + if (all_pushable) { + free(box_candidates); + free(box_positions); + free(agent_candidates); + free(sampled_idx); + free(occupied); + return 0; + } + } + + free(box_candidates); + free(box_positions); + free(agent_candidates); + free(sampled_idx); + free(occupied); + fprintf(stderr, "Failed to sample a solvable puzzle after many attempts\n"); + return -1; +} + +static int boxoban_generate_maps( + const char* output_dir, + int num_files, + int puzzles_per_file, + int size, + int num_boxes, + int min_boxes, + int max_boxes, + uint64_t seed +) { + if (boxoban_mkdir_p(output_dir) != 0) { + return -1; + } + + BoxobanRandom rng; + boxoban_seed(&rng, seed); + + int max_cells = (size - 2) * (size - 2); + BoxobanCell* agent_choices = (BoxobanCell*)malloc((size_t)max_cells * sizeof(BoxobanCell)); + BoxobanCell* confined = (BoxobanCell*)malloc((size_t)max_cells * sizeof(BoxobanCell)); + char* grid = (char*)malloc((size_t)size * (size_t)size); + if (agent_choices == NULL || confined == NULL || grid == NULL) { + free(agent_choices); + free(confined); + free(grid); + return -1; + } + + int interior_count = (size - 2) * (size - 2); + int agent_count = boxoban_build_cells(size, 0, agent_choices); + int confined_count = boxoban_build_cells(size, 1, confined); + + for (int file_idx = 0; file_idx < num_files; file_idx++) { + char out_path[1200]; + snprintf(out_path, sizeof(out_path), "%s/%03d.txt", output_dir, file_idx); + FILE* out = fopen(out_path, "w"); + if (out == NULL) { + free(agent_choices); + free(confined); + free(grid); + return -1; + } + + for (int puzzle_idx = 0; puzzle_idx < puzzles_per_file; puzzle_idx++) { + int box_count = num_boxes >= 1 ? num_boxes : boxoban_randint(&rng, min_boxes, max_boxes); + if (boxoban_make_puzzle( + size, &rng, box_count, 200, agent_choices, agent_count, confined, confined_count, interior_count, grid) != 0) { + fclose(out); + free(agent_choices); + free(confined); + free(grid); + return -1; + } + + fprintf(out, "; %d\n", puzzle_idx); + for (int r = 0; r < size; r++) { + fwrite(&grid[boxoban_grid_idx(size, r, 0)], 1, (size_t)size, out); + fputc('\n', out); + } + fputc('\n', out); + } + + fclose(out); + } + + free(agent_choices); + free(confined); + free(grid); + return 0; +} + +static int boxoban_generate_easy_maps(const char* output_dir, uint64_t seed) { + return boxoban_generate_maps(output_dir, 300, 1000, 10, -1, 1, 4, seed); +} + +static int boxoban_generate_basic_maps(const char* output_dir, uint64_t seed) { + return boxoban_generate_maps(output_dir, 300, 1000, 10, 1, 1, 4, seed); +} + +#endif diff --git a/pufferlib/ocean/boxoban/parse_maps.h b/pufferlib/ocean/boxoban/parse_maps.h new file mode 100644 index 0000000000..79e79ed8e1 --- /dev/null +++ b/pufferlib/ocean/boxoban/parse_maps.h @@ -0,0 +1,252 @@ +#ifndef PUFFERLIB_OCEAN_BOXOBAN_PARSE_MAPS_H +#define PUFFERLIB_OCEAN_BOXOBAN_PARSE_MAPS_H + +#include +#include +#include +#include +#include +#include + +#define BOXOBAN_AGENT_CHAR '@' +#define BOXOBAN_WALL_CHAR '#' +#define BOXOBAN_BOX_CHAR '$' +#define BOXOBAN_TARGET_CHAR '.' +#define BOXOBAN_BOX_ON_TARGET_CHAR '*' +#define BOXOBAN_AGENT_ON_TARGET_CHAR '+' + +#define BOXOBAN_EXPECTED_ROWS 10 +#define BOXOBAN_EXPECTED_COLS 10 +#define BOXOBAN_PUZZLE_OBS_BYTES (4 * BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS) +#define BOXOBAN_PUZZLE_META_BYTES 5 +#define BOXOBAN_PUZZLE_BYTES (BOXOBAN_PUZZLE_OBS_BYTES + BOXOBAN_PUZZLE_META_BYTES) + +typedef struct { + char rows[BOXOBAN_EXPECTED_ROWS][BOXOBAN_EXPECTED_COLS]; + int row_lengths[BOXOBAN_EXPECTED_ROWS]; + int row_count; +} BoxobanPuzzleDraft; + +static int boxoban_is_blank_line(const char* line) { + const unsigned char* p = (const unsigned char*)line; + while (*p != '\0') { + if (!isspace(*p)) { + return 0; + } + p++; + } + return 1; +} + +static int boxoban_validate_shape(const BoxobanPuzzleDraft* draft, char* reason, size_t reason_cap) { + if (draft->row_count != BOXOBAN_EXPECTED_ROWS) { + snprintf(reason, reason_cap, "expected %d rows, got %d", BOXOBAN_EXPECTED_ROWS, draft->row_count); + return -1; + } + + for (int r = 0; r < BOXOBAN_EXPECTED_ROWS; r++) { + if (draft->row_lengths[r] != BOXOBAN_EXPECTED_COLS) { + snprintf(reason, reason_cap, "row %d expected %d cols, got %d", + r, BOXOBAN_EXPECTED_COLS, draft->row_lengths[r]); + return -1; + } + } + + reason[0] = '\0'; + return 0; +} + +static int boxoban_encode_and_write_puzzle(const BoxobanPuzzleDraft* draft, FILE* out, char* reason, size_t reason_cap) { + uint8_t agent[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0}; + uint8_t walls[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0}; + uint8_t boxes[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0}; + uint8_t targets[BOXOBAN_EXPECTED_ROWS * BOXOBAN_EXPECTED_COLS] = {0}; + uint8_t meta[BOXOBAN_PUZZLE_META_BYTES] = {0}; + + int agent_x = -1; + int agent_y = -1; + int n_boxes = 0; + int n_targets = 0; + int on_target = 0; + + int idx = 0; + for (int r = 0; r < BOXOBAN_EXPECTED_ROWS; r++) { + for (int c = 0; c < BOXOBAN_EXPECTED_COLS; c++, idx++) { + char ch = draft->rows[r][c]; + + int is_agent = (ch == BOXOBAN_AGENT_CHAR || ch == BOXOBAN_AGENT_ON_TARGET_CHAR); + int is_wall = (ch == BOXOBAN_WALL_CHAR); + int is_box = (ch == BOXOBAN_BOX_CHAR || ch == BOXOBAN_BOX_ON_TARGET_CHAR); + int is_target = (ch == BOXOBAN_TARGET_CHAR || ch == BOXOBAN_BOX_ON_TARGET_CHAR || ch == BOXOBAN_AGENT_ON_TARGET_CHAR); + + if (is_agent) { + if (agent_x != -1) { + snprintf(reason, reason_cap, "Puzzle has multiple agents"); + return -1; + } + agent_x = c; + agent_y = r; + } + + n_boxes += is_box; + n_targets += is_target; + on_target += (is_box && is_target); + + agent[idx] = (uint8_t)is_agent; + walls[idx] = (uint8_t)is_wall; + boxes[idx] = (uint8_t)is_box; + targets[idx] = (uint8_t)is_target; + } + } + + if (agent_x == -1) { + snprintf(reason, reason_cap, "Puzzle has no agent"); + return -1; + } + + meta[0] = (uint8_t)agent_x; + meta[1] = (uint8_t)agent_y; + meta[2] = (uint8_t)n_boxes; + meta[3] = (uint8_t)n_targets; + meta[4] = (uint8_t)on_target; + + if (fwrite(agent, 1, sizeof(agent), out) != sizeof(agent)) return -1; + if (fwrite(walls, 1, sizeof(walls), out) != sizeof(walls)) return -1; + if (fwrite(boxes, 1, sizeof(boxes), out) != sizeof(boxes)) return -1; + if (fwrite(targets, 1, sizeof(targets), out) != sizeof(targets)) return -1; + if (fwrite(meta, 1, sizeof(meta), out) != sizeof(meta)) return -1; + + reason[0] = '\0'; + return 0; +} + +static int boxoban_finalize_puzzle( + BoxobanPuzzleDraft* draft, + FILE* out, + const char* src_path, + size_t* puzzle_idx, + size_t* written_count +) { + char reason[128]; + size_t idx = *puzzle_idx; + (*puzzle_idx)++; + + if (boxoban_validate_shape(draft, reason, sizeof(reason)) != 0) { + fprintf(stdout, "[Boxoban] Skipping malformed puzzle in %s puzzle#%zu: %s\n", src_path, idx, reason); + draft->row_count = 0; + return 0; + } + + if (boxoban_encode_and_write_puzzle(draft, out, reason, sizeof(reason)) != 0) { + if (reason[0] == '\0') { + return -1; + } + fprintf(stdout, "[Boxoban] Skipping malformed puzzle in %s puzzle#%zu: %s\n", src_path, idx, reason); + draft->row_count = 0; + return 0; + } + + (*written_count)++; + draft->row_count = 0; + return 0; +} + +static int boxoban_write_bin_from_files( + const char* const* files, + size_t file_count, + const char* out_path, + int verbose, + size_t* out_puzzle_count +) { + FILE* out = fopen(out_path, "wb"); + if (out == NULL) { + return -1; + } + + size_t puzzle_count = 0; + + for (size_t file_idx = 0; file_idx < file_count; file_idx++) { + const char* src_path = files[file_idx]; + FILE* in = fopen(src_path, "r"); + if (in == NULL) { + fclose(out); + return -1; + } + + char* line = NULL; + size_t line_cap = 0; + ssize_t line_len; + BoxobanPuzzleDraft draft; + memset(&draft, 0, sizeof(draft)); + size_t puzzle_idx = 0; + + while ((line_len = getline(&line, &line_cap, in)) != -1) { + if (line_len > 0 && line[line_len - 1] == '\n') { + line[--line_len] = '\0'; + } + + if (line[0] == ';') { + if (draft.row_count > 0) { + if (boxoban_finalize_puzzle(&draft, out, src_path, &puzzle_idx, &puzzle_count) != 0) { + free(line); + fclose(in); + fclose(out); + return -1; + } + } + continue; + } + + if (boxoban_is_blank_line(line)) { + continue; + } + + if (draft.row_count < BOXOBAN_EXPECTED_ROWS) { + int dst_row = draft.row_count; + int copy_len = line_len < BOXOBAN_EXPECTED_COLS ? (int)line_len : BOXOBAN_EXPECTED_COLS; + memcpy(draft.rows[dst_row], line, (size_t)copy_len); + draft.row_lengths[dst_row] = (int)line_len; + draft.row_count++; + } + + if (draft.row_count == BOXOBAN_EXPECTED_ROWS) { + if (boxoban_finalize_puzzle(&draft, out, src_path, &puzzle_idx, &puzzle_count) != 0) { + free(line); + fclose(in); + fclose(out); + return -1; + } + } + } + + free(line); + fclose(in); + } + + if (fflush(out) != 0) { + fclose(out); + return -1; + } + + long bytes_written = ftell(out); + fclose(out); + if (bytes_written < 0) { + return -1; + } + + size_t expected = puzzle_count * BOXOBAN_PUZZLE_BYTES; + if ((size_t)bytes_written != expected) { + fprintf(stderr, "Wrong output size: got %ld expected %zu\n", bytes_written, expected); + return -1; + } + + if (verbose) { + fprintf(stdout, "Wrote %zu puzzles to %s\n", puzzle_count, out_path); + } + if (out_puzzle_count != NULL) { + *out_puzzle_count = puzzle_count; + } + return 0; +} + +#endif diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/Crate_Black.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/Crate_Black.png new file mode 100644 index 0000000000..e90a03e0ee Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/Crate_Black.png differ diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Black.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Black.png new file mode 100644 index 0000000000..6d52856da3 Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Black.png differ diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Blue.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Blue.png new file mode 100644 index 0000000000..e5f0be4fd3 Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/EndPoint_Blue.png differ diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/GroundGravel_Concrete.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/GroundGravel_Concrete.png new file mode 100644 index 0000000000..aa04f80b8d Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/GroundGravel_Concrete.png differ diff --git a/pufferlib/ocean/boxoban/sprites_pack/PNG/Wall_Black.png b/pufferlib/ocean/boxoban/sprites_pack/PNG/Wall_Black.png new file mode 100644 index 0000000000..fe7a6bcbde Binary files /dev/null and b/pufferlib/ocean/boxoban/sprites_pack/PNG/Wall_Black.png differ diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 6c56a4ea20..8abf6ce394 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -120,6 +120,7 @@ def make_multiagent(buf=None, **kwargs): 'battle': 'Battle', 'breakout': 'Breakout', 'blastar': 'Blastar', + 'boxoban': 'Boxoban', 'convert': 'Convert', 'convert_circle': 'ConvertCircle', 'pong': 'Pong', diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index c7663c5f5b..ffb0318c3f 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -18,6 +18,102 @@ from pufferlib.pytorch import layer_init, _nativize_dtype, nativize_tensor import numpy as np +class Boxoban(nn.Module): + """ + Observations: always (B, 400) = 4 * (10*10), planes concatenated: + [agent_plane(100), target_plane(100), box_plane(100), wall_plane(100)] + Each plane is binary/float occupancy. Target+box can co-locate naturally. + + Embedding per cell: + cell_vec = pos_embed[cell] + sum_{type present} type_embed[type] + """ + + def __init__(self, env, hidden_size=128, embed_dim=8): + super().__init__() + self.hidden_size = hidden_size + self.embed_dim = embed_dim + + self.is_multidiscrete = isinstance(env.single_action_space, pufferlib.spaces.MultiDiscrete) + self.is_continuous = isinstance(env.single_action_space, pufferlib.spaces.Box) + + # Fixed layout + self.num_types = 4 + self.num_cells = 100 + self.obs_n = 400 + + self.type_embed = nn.Embedding(self.num_types, self.embed_dim) + self.pos_embed = nn.Embedding(self.num_cells, self.embed_dim) + + self.encoder = nn.Sequential( + pufferlib.pytorch.layer_init(nn.Linear(self.num_cells * self.embed_dim, 2 * hidden_size)), + nn.GELU(), + pufferlib.pytorch.layer_init(nn.Linear(2 * hidden_size, hidden_size)), + nn.GELU(), + pufferlib.pytorch.layer_init(nn.Linear(hidden_size, hidden_size)), + nn.GELU(), + ) + + if self.is_multidiscrete: + self.action_nvec = tuple(env.single_action_space.nvec) + num_atns = sum(self.action_nvec) + self.decoder = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, num_atns), std=0.01) + elif not self.is_continuous: + num_atns = env.single_action_space.n + self.decoder = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, num_atns), std=0.01) + else: + self.decoder_mean = pufferlib.pytorch.layer_init( + nn.Linear(hidden_size, env.single_action_space.shape[0]), std=0.01 + ) + self.decoder_logstd = nn.Parameter(torch.zeros(1, env.single_action_space.shape[0])) + + self.value = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, 1), std=1.0) + + def forward_eval(self, observations, state=None): + hidden = self.encode_observations(observations, state=state) + logits, values = self.decode_actions(hidden) + return logits, values + + def forward(self, observations, state=None): + return self.forward_eval(observations, state) + + def encode_observations(self, observations, state=None): + # observations: (B, 400) + B = observations.shape[0] + x = observations + if x.shape[1] != self.obs_n: + raise ValueError(f"Expected observations shape (B, {self.obs_n}), got {tuple(x.shape)}") + if x.dtype not in (torch.float16, torch.float32, torch.bfloat16): + x = x.float() + + # (B, 400) -> (B, 4, 100) -> (B, 100, 4) + x = x.view(B, self.num_types, self.num_cells).permute(0, 2, 1).contiguous() + + # Sum entity-type embeddings for present types + type_vec = x @ self.type_embed.weight # (B, 100, embed_dim) + + # Add position embedding + pos_vec = self.pos_embed.weight.unsqueeze(0).expand(B, -1, -1) # (B, 100, embed_dim) + + cell_vec = type_vec + pos_vec + flat = cell_vec.view(B, self.num_cells * self.embed_dim) + return self.encoder(flat) + + def decode_actions(self, hidden): + if self.is_multidiscrete: + logits = self.decoder(hidden).split(self.action_nvec, dim=1) + elif self.is_continuous: + mean = self.decoder_mean(hidden) + logstd = self.decoder_logstd.expand_as(mean) + std = torch.exp(logstd) + logits = torch.distributions.Normal(mean, std) + else: + logits = self.decoder(hidden) + + values = self.value(hidden) + return logits, values + + + class Boids(nn.Module): def __init__(self, env, cnn_channels=32, hidden_size=128, **kwargs):