Skip to content

Slow all to one copies #1232

@syamajala

Description

@syamajala

Is there some way to improve the performance of this example:

import numpy as np
import cupynumeric as cpy
from legate.core.task import task, InputStore, OutputStore, ReductionStore, ADD
from legate.core import(
    VariantCode,
    broadcast,
    align,
    dimension,
    constant,
    get_legate_runtime,
    LegateDataInterface,
    LogicalStore,
    get_machine,
    TaskTarget,
    Machine
    )

def get_store(obj: LegateDataInterface) -> LogicalStore:
    iface = obj.__legate_data_interface__
    assert iface["version"] == 1
    data = iface["data"]
    # There should only be one field
    assert len(data) == 1
    field = next(iter(data))
    assert not field.nullable
    column = data[field]
    assert not column.nullable
    return column.data

@task(
    variants=(VariantCode.CPU,)
)
def fill(arr_store : ReductionStore[ADD]):
    arr = np.asarray(arr_store)
    arr += np.ones(arr.shape)

@task(
    variants=(VariantCode.CPU,)
)
def print_arr(arr_store : InputStore):
    arr = np.asarray(arr_store)
    print(arr[0])

machine = get_machine()
cpus = machine.only(TaskTarget.CPU).count()
print("CPUS:", cpus)

arr = cpy.zeros((400, 1024, 1024))

runtime = get_legate_runtime()
library = fill.library
fill_task = runtime.create_manual_task(library, fill.task_id, (cpus,))
fill_task.add_reduction(get_store(arr), ADD)
fill_task.execute()

library = print_arr.library
print_arr_task = runtime.create_manual_task(library, print_arr.task_id, (1,))
print_arr_task.add_input(get_store(arr))
print_arr_task.execute()

There is a profile here: https://legion.stanford.edu/prof-viewer/?url=https://sapling2.stanford.edu/~seshu/legion_prof_legate/

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No fields configured for Bug.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions