diff --git a/src/audio/module_adapter/module_adapter.c b/src/audio/module_adapter/module_adapter.c index db1720b6f6c9..3b6b69b6cff9 100644 --- a/src/audio/module_adapter/module_adapter.c +++ b/src/audio/module_adapter/module_adapter.c @@ -54,7 +54,7 @@ struct comp_dev *module_adapter_new(const struct comp_driver *drv, #if CONFIG_MM_DRV #define PAGE_SZ CONFIG_MM_DRV_PAGE_SIZE #else -#include +#include #define PAGE_SZ HOST_PAGE_SIZE #endif diff --git a/src/include/sof/audio/component_ext.h b/src/include/sof/audio/component_ext.h index f0a1ad7e8512..d2bbf87a7764 100644 --- a/src/include/sof/audio/component_ext.h +++ b/src/include/sof/audio/component_ext.h @@ -158,9 +158,9 @@ static inline int comp_trigger_local(struct comp_dev *dev, int cmd) /* schedule or cancel task */ switch (cmd) { case COMP_TRIGGER_START: - case COMP_TRIGGER_RELEASE: ret = schedule_task(dev->task, 0, dev->period); break; + case COMP_TRIGGER_RELEASE: case COMP_TRIGGER_XRUN: case COMP_TRIGGER_PAUSE: case COMP_TRIGGER_STOP: diff --git a/src/include/sof/lib/fast-get.h b/src/include/sof/lib/fast-get.h index 36232cfbb3d6..fa34ecea128d 100644 --- a/src/include/sof/lib/fast-get.h +++ b/src/include/sof/lib/fast-get.h @@ -12,17 +12,7 @@ struct k_heap; -#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION) -#include - -__syscall const void *fast_get(struct k_heap *heap, const void * const dram_ptr, size_t size); -__syscall void fast_put(struct k_heap *heap, const void *sram_ptr); -#include -#else -const void *z_impl_fast_get(struct k_heap *heap, const void * const dram_ptr, size_t size); -void z_impl_fast_put(struct k_heap *heap, const void *sram_ptr); -#define fast_get z_impl_fast_get -#define fast_put z_impl_fast_put -#endif /* __ZEPHYR__ */ +const void *fast_get(struct k_heap *heap, const void * const dram_ptr, size_t size); +void fast_put(struct k_heap *heap, const void *sram_ptr); #endif /* __SOF_LIB_FAST_GET_H__ */ diff --git a/src/lib/objpool.c b/src/lib/objpool.c index 6afc1f94283a..614933c24d30 100644 --- a/src/lib/objpool.c +++ b/src/lib/objpool.c @@ -88,6 +88,7 @@ void *objpool_alloc(struct objpool_head *head, size_t size, uint32_t flags) unsigned int new_n; if (list_is_empty(&head->list)) { + head->flags = flags; new_n = 2; } else { /* Check the last one */ diff --git a/src/schedule/zephyr_dp_schedule.h b/src/schedule/zephyr_dp_schedule.h index 83880bd02b4e..0891cc1e4342 100644 --- a/src/schedule/zephyr_dp_schedule.h +++ b/src/schedule/zephyr_dp_schedule.h @@ -25,7 +25,9 @@ struct scheduler_dp_data { enum sof_dp_part_type { SOF_DP_PART_HEAP, + SOF_DP_PART_HEAP_CACHE, SOF_DP_PART_CFG, + SOF_DP_PART_CFG_CACHE, SOF_DP_PART_TYPE_COUNT, }; diff --git a/src/schedule/zephyr_dp_schedule_application.c b/src/schedule/zephyr_dp_schedule_application.c index e001b21ea423..9a9d19f0c722 100644 --- a/src/schedule/zephyr_dp_schedule_application.c +++ b/src/schedule/zephyr_dp_schedule_application.c @@ -392,7 +392,9 @@ void scheduler_dp_domain_free(struct processing_module *pmod) struct task_dp_pdata *pdata = pmod->dev->task->priv_data; k_mem_domain_remove_partition(mdom, pdata->mpart + SOF_DP_PART_HEAP); + k_mem_domain_remove_partition(mdom, pdata->mpart + SOF_DP_PART_HEAP_CACHE); k_mem_domain_remove_partition(mdom, pdata->mpart + SOF_DP_PART_CFG); + k_mem_domain_remove_partition(mdom, pdata->mpart + SOF_DP_PART_CFG_CACHE); pmod->mdom = NULL; objpool_free(&dp_mdom_head, mdom); @@ -504,8 +506,11 @@ int scheduler_dp_task_init(struct task **task, const struct sof_uuid_entry *uid, struct k_mem_domain *mdom = objpool_alloc(&dp_mdom_head, sizeof(*mdom), SOF_MEM_FLAG_COHERENT); - if (!mdom) + if (!mdom) { + tr_err(&dp_tr, "objpool allocation failed"); + ret = -ENOMEM; goto e_thread; + } mod->mdom = mdom; @@ -522,12 +527,22 @@ int scheduler_dp_task_init(struct task **task, const struct sof_uuid_entry *uid, .size = size, .attr = K_MEM_PARTITION_P_RW_U_RW, }; + pdata->mpart[SOF_DP_PART_HEAP_CACHE] = (struct k_mem_partition){ + .start = (uintptr_t)sys_cache_cached_ptr_get((void *)start), + .size = size, + .attr = K_MEM_PARTITION_P_RW_U_RW | XTENSA_MMU_CACHED_WB, + }; /* Host mailbox partition for additional IPC parameters: read-only */ pdata->mpart[SOF_DP_PART_CFG] = (struct k_mem_partition){ - .start = (uintptr_t)MAILBOX_HOSTBOX_BASE, + .start = (uintptr_t)sys_cache_uncached_ptr_get((void *)MAILBOX_HOSTBOX_BASE), .size = 4096, .attr = K_MEM_PARTITION_P_RO_U_RO, }; + pdata->mpart[SOF_DP_PART_CFG_CACHE] = (struct k_mem_partition){ + .start = (uintptr_t)MAILBOX_HOSTBOX_BASE, + .size = 4096, + .attr = K_MEM_PARTITION_P_RO_U_RO | XTENSA_MMU_CACHED_WB, + }; for (pidx = 0; pidx < SOF_DP_PART_TYPE_COUNT; pidx++) { ret = k_mem_domain_add_partition(mdom, pdata->mpart + pidx); diff --git a/tools/topology/topology2/development/tplg-targets.cmake b/tools/topology/topology2/development/tplg-targets.cmake index 7986eb2bad11..b382dc1b4bc1 100644 --- a/tools/topology/topology2/development/tplg-targets.cmake +++ b/tools/topology/topology2/development/tplg-targets.cmake @@ -60,9 +60,9 @@ NHLT_BIN=nhlt-sof-lnl-nocodec-fpga-4ch.bin,PASSTHROUGH=true,DMIC_IO_CLK=19200000 # HDA topology with passthrough analog codec pipelines using CHAIN_DMA "sof-hda-generic\;sof-hda-passthrough-chain-dma\;HDA_CONFIG=passthrough,CODEC_HDA_CHAIN_DMA=true" -# SSP topology for PTL +# SSP topology for PTL, includes Data Processing SRC "cavs-nocodec\;sof-ptl-nocodec\;PLATFORM=ptl,NUM_DMICS=4,PDM1_MIC_A_ENABLE=1,PDM1_MIC_B_ENABLE=1,\ -PREPROCESS_PLUGINS=nhlt,NHLT_BIN=nhlt-sof-ptl-nocodec.bin" +PREPROCESS_PLUGINS=nhlt,NHLT_BIN=nhlt-sof-ptl-nocodec.bin,SRC_DOMAIN=DP" # SSP topology for PTL with 96 kHz DMIC "cavs-nocodec\;sof-ptl-nocodec-dmic-4ch-96k\;PLATFORM=ptl,NUM_DMICS=4,PDM1_MIC_A_ENABLE=1,PDM1_MIC_B_ENABLE=1,\ @@ -310,9 +310,9 @@ PREPROCESS_PLUGINS=nhlt,NHLT_BIN=nhlt-sof-mtl-nocodec.bin,SRC_DOMAIN=DP" # SSP test topology for Data Processing SRC on LNL "cavs-nocodec\;sof-lnl-nocodec-dp-test\;PLATFORM=lnl,NUM_DMICS=4,PDM1_MIC_A_ENABLE=1,PDM1_MIC_B_ENABLE=1,\ PREPROCESS_PLUGINS=nhlt,NHLT_BIN=nhlt-sof-lnl-nocodec.bin,SRC_DOMAIN=DP" -# SSP test topology for Data Processing SRC on PTL +# SSP test topology for PTL with no DP "cavs-nocodec\;sof-ptl-nocodec-dp-test\;PLATFORM=ptl,NUM_DMICS=4,PDM1_MIC_A_ENABLE=1,PDM1_MIC_B_ENABLE=1,\ -PREPROCESS_PLUGINS=nhlt,NHLT_BIN=nhlt-sof-ptl-nocodec.bin,SRC_DOMAIN=DP" +PREPROCESS_PLUGINS=nhlt,NHLT_BIN=nhlt-sof-ptl-nocodec.bin" # SSP test topology for Data Processing on core 1 SRC for MTL "cavs-nocodec\;sof-mtl-nocodec-dp-core-test\;PLATFORM=mtl,NUM_DMICS=4,PDM1_MIC_A_ENABLE=1,PDM1_MIC_B_ENABLE=1,\ diff --git a/zephyr/lib/fast-get.c b/zephyr/lib/fast-get.c index 4f08d111e8b9..114078d5ef5f 100644 --- a/zephyr/lib/fast-get.c +++ b/zephyr/lib/fast-get.c @@ -19,6 +19,9 @@ struct sof_fast_get_entry { const void *dram_ptr; void *sram_ptr; +#if CONFIG_USERSPACE + struct k_thread *thread; +#endif size_t size; unsigned int refcount; }; @@ -81,16 +84,57 @@ static struct sof_fast_get_entry *fast_get_find_entry(struct sof_fast_get_data * return NULL; } -const void *z_impl_fast_get(struct k_heap *heap, const void *dram_ptr, size_t size) +#if CONFIG_MM_DRV +#define PAGE_SZ CONFIG_MM_DRV_PAGE_SIZE +#define FAST_GET_MAX_COPY_SIZE (PAGE_SZ / 2) +#else +#include +#define PAGE_SZ HOST_PAGE_SIZE +#define FAST_GET_MAX_COPY_SIZE 0 +#endif + +#if CONFIG_USERSPACE +static int fast_get_access_grant(k_tid_t thread, void *addr, size_t size) +{ + struct k_mem_partition part = { + .start = (uintptr_t)addr, + .size = ALIGN_UP(size, CONFIG_MM_DRV_PAGE_SIZE), + .attr = K_MEM_PARTITION_P_RO_U_RO | XTENSA_MMU_CACHED_WB, + }; + + LOG_DBG("add %#zx @ %p", part.size, addr); + return k_mem_domain_add_partition(thread->mem_domain_info.mem_domain, &part); +} +#endif /* CONFIG_USERSPACE */ + +const void *fast_get(struct k_heap *heap, const void *dram_ptr, size_t size) { struct sof_fast_get_data *data = &fast_get_data; + uint32_t alloc_flags = SOF_MEM_FLAG_USER; struct sof_fast_get_entry *entry; + size_t alloc_size, alloc_align; + const void *alloc_ptr; k_spinlock_key_t key; void *ret; key = k_spin_lock(&data->lock); + if (IS_ENABLED(CONFIG_USERSPACE) && size > FAST_GET_MAX_COPY_SIZE) { + alloc_size = ALIGN_UP(size, PAGE_SZ); + alloc_align = PAGE_SZ; + alloc_flags |= SOF_MEM_FLAG_LARGE_BUFFER; + } else { + alloc_size = size; + alloc_align = PLATFORM_DCACHE_ALIGN; + } + + if (size > FAST_GET_MAX_COPY_SIZE || !IS_ENABLED(CONFIG_USERSPACE)) + alloc_ptr = dram_ptr; + else + /* When userspace is enabled only share large buffers */ + alloc_ptr = NULL; + do { - entry = fast_get_find_entry(data, dram_ptr); + entry = fast_get_find_entry(data, alloc_ptr); if (!entry) { if (fast_get_realloc(data)) { ret = NULL; @@ -99,14 +143,38 @@ const void *z_impl_fast_get(struct k_heap *heap, const void *dram_ptr, size_t si } } while (!entry); +#if CONFIG_USERSPACE + LOG_DBG("userspace %u part %#zx bytes alloc %p entry %p DRAM %p", + k_current_get()->mem_domain_info.mem_domain->num_partitions, size, + alloc_ptr, entry->sram_ptr, dram_ptr); +#endif + if (entry->sram_ptr) { if (entry->size != size || entry->dram_ptr != dram_ptr) { - tr_err(fast_get, "size %u != %u or ptr %p != %p mismatch", + LOG_ERR("size %u != %u or ptr %p != %p mismatch", entry->size, size, entry->dram_ptr, dram_ptr); ret = NULL; goto out; } +#if CONFIG_USERSPACE + /* We only get there for large buffers */ + if (k_current_get()->mem_domain_info.mem_domain->num_partitions > 1) { + /* A userspace thread makes the request */ + if (k_current_get() != entry->thread) { + int err = fast_get_access_grant(k_current_get(), + entry->sram_ptr, size); + + if (err < 0) { + ret = NULL; + goto out; + } + } else { + LOG_WRN("Repeated access request by thread"); + } + } +#endif + ret = entry->sram_ptr; entry->refcount++; /* @@ -117,23 +185,42 @@ const void *z_impl_fast_get(struct k_heap *heap, const void *dram_ptr, size_t si goto out; } - ret = sof_heap_alloc(heap, SOF_MEM_FLAG_USER, size, PLATFORM_DCACHE_ALIGN); + /* + * If a userspace threads is the first user to fast-get the buffer, an + * SRAM copy will be allocated on its own heap, so it will have access + * to it + */ + ret = sof_heap_alloc(heap, alloc_flags, alloc_size, alloc_align); if (!ret) goto out; entry->size = size; entry->sram_ptr = ret; memcpy_s(entry->sram_ptr, entry->size, dram_ptr, size); dcache_writeback_region((__sparse_force void __sparse_cache *)entry->sram_ptr, size); + +#if CONFIG_USERSPACE + entry->thread = k_current_get(); + if (size > FAST_GET_MAX_COPY_SIZE) { + /* Otherwise we've allocated on thread's heap, so it already has access */ + int err = fast_get_access_grant(entry->thread, ret, size); + + if (err < 0) { + sof_heap_free(NULL, ret); + ret = NULL; + goto out; + } + } +#endif /* CONFIG_USERSPACE */ + entry->dram_ptr = dram_ptr; entry->refcount = 1; out: k_spin_unlock(&data->lock, key); - tr_dbg(fast_get, "get %p, %p, size %u, refcnt %u", dram_ptr, ret, size, - entry ? entry->refcount : 0); + LOG_DBG("get %p, %p, size %u, refcnt %u", dram_ptr, ret, size, entry ? entry->refcount : 0); return ret; } -EXPORT_SYMBOL(z_impl_fast_get); +EXPORT_SYMBOL(fast_get); static struct sof_fast_get_entry *fast_put_find_entry(struct sof_fast_get_data *data, const void *sram_ptr) @@ -148,7 +235,7 @@ static struct sof_fast_get_entry *fast_put_find_entry(struct sof_fast_get_data * return NULL; } -void z_impl_fast_put(struct k_heap *heap, const void *sram_ptr) +void fast_put(struct k_heap *heap, const void *sram_ptr) { struct sof_fast_get_data *data = &fast_get_data; struct sof_fast_get_entry *entry; @@ -157,42 +244,33 @@ void z_impl_fast_put(struct k_heap *heap, const void *sram_ptr) key = k_spin_lock(&fast_get_data.lock); entry = fast_put_find_entry(data, sram_ptr); if (!entry) { - tr_err(fast_get, "Put called to unknown address %p", sram_ptr); + LOG_ERR("Put called to unknown address %p", sram_ptr); goto out; } entry->refcount--; + +#if CONFIG_USERSPACE + if (entry->size > FAST_GET_MAX_COPY_SIZE && + k_current_get()->mem_domain_info.mem_domain->num_partitions > 1) { + struct k_mem_partition part = { + .start = (uintptr_t)entry->sram_ptr, + .size = ALIGN_UP(entry->size, CONFIG_MM_DRV_PAGE_SIZE), + .attr = K_MEM_PARTITION_P_RO_U_RO | XTENSA_MMU_CACHED_WB, + }; + + LOG_DBG("remove %#zx @ %p", part.size, entry->sram_ptr); + k_mem_domain_remove_partition(k_current_get()->mem_domain_info.mem_domain, + &part); + } +#endif + if (!entry->refcount) { sof_heap_free(heap, entry->sram_ptr); memset(entry, 0, sizeof(*entry)); } out: - tr_dbg(fast_get, "put %p, DRAM %p size %u refcnt %u", sram_ptr, entry ? entry->dram_ptr : 0, + LOG_DBG("put %p, DRAM %p size %u refcnt %u", sram_ptr, entry ? entry->dram_ptr : 0, entry ? entry->size : 0, entry ? entry->refcount : 0); k_spin_unlock(&data->lock, key); } -EXPORT_SYMBOL(z_impl_fast_put); - -#ifdef CONFIG_USERSPACE -#include -void z_vrfy_fast_put(struct k_heap *heap, const void *sram_ptr) -{ - K_OOPS(K_SYSCALL_MEMORY_WRITE(heap, sizeof(*heap))); - /* - * FIXME: we don't know how much SRAM has been allocated, so cannot - * check. Should fast_put() be changed to pass a size argument? - */ - - z_impl_fast_put(heap, sram_ptr); -} -#include - -const void *z_vrfy_fast_get(struct k_heap *heap, const void *dram_ptr, size_t size) -{ - K_OOPS(K_SYSCALL_MEMORY_WRITE(heap, sizeof(*heap))); - /* We cannot (easily) verify the actual heapp memory */ - K_OOPS(K_SYSCALL_MEMORY_READ(dram_ptr, size)); - - return z_impl_fast_get(heap, dram_ptr, size); -} -#include -#endif +EXPORT_SYMBOL(fast_put);