Skip to content

Commit ed298ce

Browse files
committed
Finally revised API after learning/uses
1 parent bb2e1ae commit ed298ce

11 files changed

Lines changed: 117 additions & 110 deletions

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ typedef struct libxstream_event_t libxstream_event_t;
1919
```C
2020
int libxstream_init(void);
2121
int libxstream_finalize(void);
22-
int libxstream_get_ndevices(int* ndevices);
23-
int libxstream_set_active_device(int device_id);
24-
int libxstream_device_synchronize(void);
22+
int libxstream_device_count(int* ndevices);
23+
int libxstream_device_set_active(int device_id);
24+
int libxstream_device_sync(void);
2525
```
2626
2727
### Streams
@@ -40,22 +40,22 @@ int libxstream_event_create(libxstream_event_t** event_p);
4040
int libxstream_event_destroy(libxstream_event_t* event);
4141
int libxstream_event_record(libxstream_event_t* event, libxstream_stream_t* stream);
4242
int libxstream_event_query(libxstream_event_t* event, libxstream_bool_t* has_occurred);
43-
int libxstream_event_synchronize(libxstream_event_t* event);
43+
int libxstream_event_sync(libxstream_event_t* event);
4444
```
4545
4646
### Memory
4747
4848
Device and host memory allocation, transfers (H2D, D2H, D2D), and initialization. Memory pointers remain untyped (`void*`); stream parameters use the opaque stream type.
4949
5050
```C
51-
void* libxstream_memdev_allocate(size_t nbytes);
52-
void libxstream_memdev_deallocate(void* dev_mem);
53-
void* libxstream_memhst_allocate(size_t nbytes, libxstream_stream_t* stream);
54-
int libxstream_memhst_deallocate(void* host_mem, libxstream_stream_t* stream);
55-
int libxstream_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, libxstream_stream_t* stream);
56-
int libxstream_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, libxstream_stream_t* stream);
57-
int libxstream_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbytes, libxstream_stream_t* stream);
58-
int libxstream_memset_zero(void* dev_mem, size_t offset, size_t nbytes, libxstream_stream_t* stream);
51+
int libxstream_mem_allocate(void** dev_mem, size_t nbytes);
52+
int libxstream_mem_deallocate(void* dev_mem);
53+
int libxstream_mem_host_allocate(void** host_mem, size_t nbytes, libxstream_stream_t* stream);
54+
int libxstream_mem_host_deallocate(void* host_mem, libxstream_stream_t* stream);
55+
int libxstream_mem_copy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, libxstream_stream_t* stream);
56+
int libxstream_mem_copy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, libxstream_stream_t* stream);
57+
int libxstream_mem_copy_d2d(const void* devmem_src, void* devmem_dst, size_t nbytes, libxstream_stream_t* stream);
58+
int libxstream_mem_zero(void* dev_mem, size_t offset, size_t nbytes, libxstream_stream_t* stream);
5959
```
6060

6161
### DBCSR Compatibility

include/libxstream.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ int libxstream_init(void);
2626
int libxstream_finalize(void);
2727

2828
/** devices */
29-
int libxstream_get_ndevices(int* ndevices);
30-
int libxstream_set_active_device(int device_id);
31-
int libxstream_device_synchronize(void);
29+
int libxstream_device_count(int* ndevices);
30+
int libxstream_device_set_active(int device_id);
31+
int libxstream_device_sync(void);
3232

3333
/** streams */
3434
int libxstream_stream_priority_range(int* least, int* greatest);
@@ -44,19 +44,19 @@ int libxstream_event_create(libxstream_event_t** event_p);
4444
int libxstream_event_destroy(libxstream_event_t* event);
4545
int libxstream_event_record(libxstream_event_t* event, libxstream_stream_t* stream);
4646
int libxstream_event_query(libxstream_event_t* event, libxstream_bool_t* has_occurred);
47-
int libxstream_event_synchronize(libxstream_event_t* event);
47+
int libxstream_event_sync(libxstream_event_t* event);
4848

4949
/** memory */
50-
void* libxstream_memdev_allocate(size_t nbytes);
51-
void libxstream_memdev_deallocate(void* dev_mem);
52-
int libxstream_memdev_set_ptr(void** dev_mem, void* other, size_t lb);
53-
int libxstream_memdev_info(size_t* mem_free, size_t* mem_total);
54-
void* libxstream_memhst_allocate(size_t nbytes, libxstream_stream_t* stream);
55-
int libxstream_memhst_deallocate(void* host_mem, libxstream_stream_t* stream);
56-
int libxstream_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, libxstream_stream_t* stream);
57-
int libxstream_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, libxstream_stream_t* stream);
58-
int libxstream_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbytes, libxstream_stream_t* stream);
59-
int libxstream_memset_zero(void* dev_mem, size_t offset, size_t nbytes, libxstream_stream_t* stream);
50+
int libxstream_mem_allocate(void** dev_mem, size_t nbytes);
51+
int libxstream_mem_deallocate(void* dev_mem);
52+
int libxstream_mem_offset(void** dev_mem, void* other, size_t lb);
53+
int libxstream_mem_info(size_t* mem_free, size_t* mem_total);
54+
int libxstream_mem_host_allocate(void** host_mem, size_t nbytes, libxstream_stream_t* stream);
55+
int libxstream_mem_host_deallocate(void* host_mem, libxstream_stream_t* stream);
56+
int libxstream_mem_copy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, libxstream_stream_t* stream);
57+
int libxstream_mem_copy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, libxstream_stream_t* stream);
58+
int libxstream_mem_copy_d2d(const void* devmem_src, void* devmem_dst, size_t nbytes, libxstream_stream_t* stream);
59+
int libxstream_mem_zero(void* dev_mem, size_t offset, size_t nbytes, libxstream_stream_t* stream);
6060

6161
#if defined(__cplusplus)
6262
}

include/libxstream_opencl.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ struct libxstream_event_t {
192192
cl_event cl_evt;
193193
};
194194

195-
/** Settings updated during libxstream_set_active_device. */
195+
/** Settings updated during libxstream_device_set_active. */
196196
typedef struct libxstream_opencl_device_t {
197197
/** Activated device context. */
198198
cl_context context;
@@ -255,7 +255,7 @@ typedef enum libxstream_opencl_atomic_fp_t {
255255

256256
/**
257257
* Settings discovered/setup during libxstream_init (independent of the device)
258-
* and settings updated during libxstream_set_active_device (devinfo).
258+
* and settings updated during libxstream_device_set_active (devinfo).
259259
*/
260260
typedef struct libxstream_opencl_config_t {
261261
/** Table of ordered viable/discovered devices (matching criterion). */
@@ -332,7 +332,7 @@ int libxstream_opencl_info_devptr(
332332
const libxstream_opencl_stream_t* libxstream_opencl_stream(libxs_lock_t* lock, int thread_id);
333333
/** Determines default-stream (see libxstream_opencl_device_t::stream). */
334334
const libxstream_opencl_stream_t* libxstream_opencl_stream_default(void);
335-
/** Like libxstream_memset_zero, but supporting an arbitrary value used as initialization pattern. */
335+
/** Like libxstream_mem_zero, but supporting an arbitrary value used as initialization pattern. */
336336
int libxstream_opencl_memset(void* dev_mem, int value, size_t offset, size_t nbytes, libxstream_stream_t* stream);
337337
/** Amount of device memory; local memory is only non-zero if separate from global. */
338338
int libxstream_opencl_info_devmem(cl_device_id device, size_t* mem_free, size_t* mem_total, size_t* mem_local, int* mem_unified);
@@ -352,7 +352,7 @@ int libxstream_opencl_device_level(
352352
int libxstream_opencl_device_ext(cl_device_id device, const char* const extnames[], int num_exts);
353353
/** Create context for given device. */
354354
int libxstream_opencl_create_context(cl_device_id device_id, cl_context* context);
355-
/** Internal variant of libxstream_set_active_device. */
355+
/** Internal variant of libxstream_device_set_active. */
356356
int libxstream_opencl_set_active_device(libxs_lock_t* lock, int device_id);
357357
/** Assemble flags to support atomic operations. */
358358
int libxstream_opencl_flags_atomics(const libxstream_opencl_device_t* devinfo, libxstream_opencl_atomic_fp_t kind,
@@ -373,7 +373,7 @@ int libxstream_opencl_kernel_flags(const char build_params[], const char build_o
373373
int libxstream_opencl_kernel(size_t source_kind, const char source[], const char kernel_name[], const char build_params[],
374374
const char build_options[], const char try_build_options[], int* try_ok, const char* const extnames[], size_t num_exts,
375375
cl_kernel* kernel);
376-
/** Per-thread variant of libxstream_device_synchronize. */
376+
/** Per-thread variant of libxstream_device_sync. */
377377
int libxstream_opencl_device_synchronize(libxs_lock_t* lock, int thread_id);
378378
/** To support USM, call this function for pointer arguments instead of clSetKernelArg. */
379379
int libxstream_opencl_set_kernel_ptr(cl_kernel kernel, cl_uint arg_index, const void* arg_value);

samples/ozaki/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ eliminating per-call allocation overhead for repeated GEMM calls. With the pool
8787
active, `ozaki_gemm` returns without synchronizing — the caller is responsible
8888
for syncing the stream. On the rare grow path (larger problem size), the
8989
wrapped deallocator syncs all streams before reallocating. The pool falls back
90-
to direct `libxstream_memdev_allocate` transparently if USM/SVM is not supported.
90+
to direct `libxstream_mem_allocate` transparently if USM/SVM is not supported.
9191

9292
## Build
9393

samples/ozaki/ozaki_bench.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ int main(int argc, char* argv[])
7777
if (EXIT_SUCCESS == result) {
7878
int ndevices = 0;
7979
initialized = 1;
80-
result = libxstream_get_ndevices(&ndevices);
80+
result = libxstream_device_count(&ndevices);
8181
if (EXIT_SUCCESS == result && 0 < ndevices) {
82-
result = libxstream_set_active_device(0);
82+
result = libxstream_device_set_active(0);
8383
}
8484
else if (EXIT_SUCCESS == result) {
8585
fprintf(stderr, "ERROR: no ACC device found\n");
@@ -136,11 +136,11 @@ int main(int argc, char* argv[])
136136
if (EXIT_SUCCESS == result) {
137137
const int a_rows = (0 == ta ? M : K), a_cols = (0 == ta ? K : M);
138138
const int b_rows = (0 == tb ? K : N), b_cols = (0 == tb ? N : K);
139-
a = libxstream_memhst_allocate((size_t)lda * a_cols * elem_size, stream);
140-
if (NULL != a) b = libxstream_memhst_allocate((size_t)ldb * b_cols * elem_size, stream);
141-
if (NULL != b) c_oz = libxstream_memhst_allocate((size_t)ldc * N * elem_size, stream);
142-
if (NULL != c_oz) c_ref = libxstream_memhst_allocate((size_t)ldc * N * elem_size, stream);
143-
if (NULL == a || NULL == b || NULL == c_oz || NULL == c_ref) {
139+
result = libxstream_mem_host_allocate((void**)&a, (size_t)lda * a_cols * elem_size, stream);
140+
if (EXIT_SUCCESS == result) result = libxstream_mem_host_allocate((void**)&b, (size_t)ldb * b_cols * elem_size, stream);
141+
if (EXIT_SUCCESS == result) result = libxstream_mem_host_allocate((void**)&c_oz, (size_t)ldc * N * elem_size, stream);
142+
if (EXIT_SUCCESS == result) result = libxstream_mem_host_allocate((void**)&c_ref, (size_t)ldc * N * elem_size, stream);
143+
if (EXIT_SUCCESS != result) {
144144
fprintf(stderr, "ERROR: out of memory\n");
145145
result = EXIT_FAILURE;
146146
}
@@ -219,10 +219,10 @@ int main(int argc, char* argv[])
219219
}
220220

221221
if (0 != initialized) {
222-
if (NULL != a) libxstream_memhst_deallocate(a, stream);
223-
if (NULL != b) libxstream_memhst_deallocate(b, stream);
224-
if (NULL != c_oz) libxstream_memhst_deallocate(c_oz, stream);
225-
if (NULL != c_ref) libxstream_memhst_deallocate(c_ref, stream);
222+
if (NULL != a) libxstream_mem_host_deallocate(a, stream);
223+
if (NULL != b) libxstream_mem_host_deallocate(b, stream);
224+
if (NULL != c_oz) libxstream_mem_host_deallocate(c_oz, stream);
225+
if (NULL != c_ref) libxstream_mem_host_deallocate(c_ref, stream);
226226
if (NULL != stream) libxstream_stream_destroy(stream);
227227
ozaki_destroy(&ctx);
228228
libxstream_finalize();

samples/ozaki/ozaki_opencl.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,17 @@
2525
# define OZAKI_DEV_ALLOC(PTR, SIZE) ( \
2626
(NULL != pool) \
2727
? ((*(PTR) = libxs_malloc(pool, SIZE, LIBXS_MALLOC_NATIVE)) != NULL ? EXIT_SUCCESS : EXIT_FAILURE) \
28-
: ((*(PTR) = libxstream_memdev_allocate(SIZE)) != NULL ? EXIT_SUCCESS : EXIT_FAILURE))
28+
: libxstream_mem_allocate((void**)(PTR), SIZE))
2929
# define OZAKI_DEV_FREE(PTR) do { \
3030
if (NULL != (PTR)) { \
31-
if (NULL != pool) libxs_free(PTR); else libxstream_memdev_deallocate(PTR); \
31+
if (NULL != pool) libxs_free(PTR); else libxstream_mem_deallocate(PTR); \
3232
} \
3333
} while (0)
3434
#else
3535
# define OZAKI_DEV_ALLOC(PTR, SIZE) \
36-
((*(PTR) = libxstream_memdev_allocate(SIZE)) != NULL ? EXIT_SUCCESS : EXIT_FAILURE)
36+
libxstream_mem_allocate((void**)(PTR), SIZE)
3737
# define OZAKI_DEV_FREE(PTR) do { \
38-
if (NULL != (PTR)) libxstream_memdev_deallocate(PTR); \
38+
if (NULL != (PTR)) libxstream_mem_deallocate(PTR); \
3939
} while (0)
4040
#endif
4141

@@ -51,8 +51,10 @@
5151
/* Wrapped allocator for libxs_malloc_xpool: delegates to device allocator. */
5252
static void* ozaki_dev_allocate(size_t size, const void* extra)
5353
{
54+
void* result = NULL;
5455
(void)extra;
55-
return libxstream_memdev_allocate(size);
56+
libxstream_mem_allocate(&result, size);
57+
return result;
5658
}
5759

5860
/* Wrapped deallocator: syncs all streams before freeing device memory.
@@ -63,7 +65,7 @@ static void ozaki_dev_deallocate(void* pointer, const void* extra)
6365
if (NULL != ctx->stream) libxstream_stream_sync(ctx->stream);
6466
if (NULL != ctx->stream_a) libxstream_stream_sync(ctx->stream_a);
6567
if (NULL != ctx->stream_b) libxstream_stream_sync(ctx->stream_b);
66-
libxstream_memdev_deallocate(pointer);
68+
libxstream_mem_deallocate(pointer);
6769
}
6870
#endif
6971

@@ -457,9 +459,9 @@ int ozaki_gemm(ozaki_context_t* ctx, libxstream_stream_t* stream,
457459
if (EXIT_SUCCESS == result) result = OZAKI_DEV_ALLOC(&d_b, b_nbytes);
458460
if (EXIT_SUCCESS == result) result = OZAKI_DEV_ALLOC(&d_c, c_nbytes);
459461
/* Overlapped H2D: A via stream_a, B via stream_b, C via main */
460-
if (EXIT_SUCCESS == result) result = libxstream_memcpy_h2d(a, d_a, a_nbytes, stream_a);
461-
if (EXIT_SUCCESS == result) result = libxstream_memcpy_h2d(b, d_b, b_nbytes, stream_b);
462-
if (EXIT_SUCCESS == result) result = libxstream_memcpy_h2d(c, d_c, c_nbytes, stream);
462+
if (EXIT_SUCCESS == result) result = libxstream_mem_copy_h2d(a, d_a, a_nbytes, stream_a);
463+
if (EXIT_SUCCESS == result) result = libxstream_mem_copy_h2d(b, d_b, b_nbytes, stream_b);
464+
if (EXIT_SUCCESS == result) result = libxstream_mem_copy_h2d(c, d_c, c_nbytes, stream);
463465
}
464466

465467
/* Pre-allocate double-buffered preprocessing buffers (max batch size) */
@@ -640,7 +642,7 @@ int ozaki_gemm(ozaki_context_t* ctx, libxstream_stream_t* stream,
640642
}
641643

642644
/* Read back result C; caller is responsible for syncing the stream */
643-
if (EXIT_SUCCESS == result) result = libxstream_memcpy_d2h(d_c, c, c_nbytes, stream);
645+
if (EXIT_SUCCESS == result) result = libxstream_mem_copy_d2h(d_c, c, c_nbytes, stream);
644646

645647
/* Return buffers to pool (no deallocation, no sync needed) or free directly */
646648
{ int s;

src/libxstream.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -756,15 +756,15 @@ int libxstream_opencl_use_cmem(const libxstream_opencl_device_t* devinfo) {
756756
}
757757

758758

759-
int libxstream_get_ndevices(int* ndevices) {
759+
int libxstream_device_count(int* ndevices) {
760760
int result;
761761
# if defined(__DBCSR_ACC) /* lazy initialization */
762-
/* DBCSR calls libxstream_get_ndevices before calling libxstream_init. */
762+
/* DBCSR calls libxstream_device_count before calling libxstream_init. */
763763
result = libxstream_init();
764764
if (EXIT_SUCCESS == result)
765765
# endif
766766
{
767-
if (NULL != ndevices && 0 != libxstream_opencl_config.ndevices) {
767+
if (NULL != ndevices) {
768768
*ndevices = (0 < libxstream_opencl_config.ndevices ? libxstream_opencl_config.ndevices : 0);
769769
result = EXIT_SUCCESS;
770770
}
@@ -1212,7 +1212,7 @@ int libxstream_opencl_set_active_device(libxs_lock_t* lock, int device_id) {
12121212
}
12131213

12141214

1215-
int libxstream_set_active_device(int device_id) {
1215+
int libxstream_device_set_active(int device_id) {
12161216
int result = EXIT_SUCCESS;
12171217
if (0 <= device_id) {
12181218
# if defined(__DBCSR_ACC) && defined(__OFFLOAD_OPENCL)

0 commit comments

Comments
 (0)