2525# define OZAKI_DEV_ALLOC (PTR , SIZE ) ( \
2626 (NULL != pool) \
2727 ? ((*(PTR) = libxs_malloc(pool, SIZE, LIBXS_MALLOC_NATIVE)) != NULL ? EXIT_SUCCESS : EXIT_FAILURE) \
28- : ((* (PTR) = libxstream_memdev_allocate( SIZE)) != NULL ? EXIT_SUCCESS : EXIT_FAILURE ))
28+ : libxstream_mem_allocate((void**) (PTR), SIZE))
2929# define OZAKI_DEV_FREE (PTR ) do { \
3030 if (NULL != (PTR)) { \
31- if (NULL != pool) libxs_free(PTR); else libxstream_memdev_deallocate (PTR); \
31+ if (NULL != pool) libxs_free(PTR); else libxstream_mem_deallocate (PTR); \
3232 } \
3333} while (0)
3434#else
3535# define OZAKI_DEV_ALLOC (PTR , SIZE ) \
36- ((* (PTR) = libxstream_memdev_allocate( SIZE)) != NULL ? EXIT_SUCCESS : EXIT_FAILURE )
36+ libxstream_mem_allocate((void**) (PTR), SIZE)
3737# define OZAKI_DEV_FREE (PTR ) do { \
38- if (NULL != (PTR)) libxstream_memdev_deallocate (PTR); \
38+ if (NULL != (PTR)) libxstream_mem_deallocate (PTR); \
3939} while (0)
4040#endif
4141
5151/* Wrapped allocator for libxs_malloc_xpool: delegates to device allocator. */
5252static void * ozaki_dev_allocate (size_t size , const void * extra )
5353{
54+ void * result = NULL ;
5455 (void )extra ;
55- return libxstream_memdev_allocate (size );
56+ libxstream_mem_allocate (& result , size );
57+ return result ;
5658}
5759
5860/* Wrapped deallocator: syncs all streams before freeing device memory.
@@ -63,7 +65,7 @@ static void ozaki_dev_deallocate(void* pointer, const void* extra)
6365 if (NULL != ctx -> stream ) libxstream_stream_sync (ctx -> stream );
6466 if (NULL != ctx -> stream_a ) libxstream_stream_sync (ctx -> stream_a );
6567 if (NULL != ctx -> stream_b ) libxstream_stream_sync (ctx -> stream_b );
66- libxstream_memdev_deallocate (pointer );
68+ libxstream_mem_deallocate (pointer );
6769}
6870#endif
6971
@@ -457,9 +459,9 @@ int ozaki_gemm(ozaki_context_t* ctx, libxstream_stream_t* stream,
457459 if (EXIT_SUCCESS == result ) result = OZAKI_DEV_ALLOC (& d_b , b_nbytes );
458460 if (EXIT_SUCCESS == result ) result = OZAKI_DEV_ALLOC (& d_c , c_nbytes );
459461 /* Overlapped H2D: A via stream_a, B via stream_b, C via main */
460- if (EXIT_SUCCESS == result ) result = libxstream_memcpy_h2d (a , d_a , a_nbytes , stream_a );
461- if (EXIT_SUCCESS == result ) result = libxstream_memcpy_h2d (b , d_b , b_nbytes , stream_b );
462- if (EXIT_SUCCESS == result ) result = libxstream_memcpy_h2d (c , d_c , c_nbytes , stream );
462+ if (EXIT_SUCCESS == result ) result = libxstream_mem_copy_h2d (a , d_a , a_nbytes , stream_a );
463+ if (EXIT_SUCCESS == result ) result = libxstream_mem_copy_h2d (b , d_b , b_nbytes , stream_b );
464+ if (EXIT_SUCCESS == result ) result = libxstream_mem_copy_h2d (c , d_c , c_nbytes , stream );
463465 }
464466
465467 /* Pre-allocate double-buffered preprocessing buffers (max batch size) */
@@ -640,7 +642,7 @@ int ozaki_gemm(ozaki_context_t* ctx, libxstream_stream_t* stream,
640642 }
641643
642644 /* Read back result C; caller is responsible for syncing the stream */
643- if (EXIT_SUCCESS == result ) result = libxstream_memcpy_d2h (d_c , c , c_nbytes , stream );
645+ if (EXIT_SUCCESS == result ) result = libxstream_mem_copy_d2h (d_c , c , c_nbytes , stream );
644646
645647 /* Return buffers to pool (no deallocation, no sync needed) or free directly */
646648 { int s ;
0 commit comments