From 0e1072d9e3c9a7f43b3eb02bf6a4b6ea60491e64 Mon Sep 17 00:00:00 2001 From: Nader-Rahhal Date: Tue, 2 Jun 2026 17:49:10 -0400 Subject: [PATCH 1/6] start svd --- lib/cunumeric_jl_wrapper/src/types.cpp | 1 + src/ndarray/linalg.jl | 114 +++++++++++++++++-------- 2 files changed, 81 insertions(+), 34 deletions(-) diff --git a/lib/cunumeric_jl_wrapper/src/types.cpp b/lib/cunumeric_jl_wrapper/src/types.cpp index f181dc2e..998a6d75 100644 --- a/lib/cunumeric_jl_wrapper/src/types.cpp +++ b/lib/cunumeric_jl_wrapper/src/types.cpp @@ -166,4 +166,5 @@ void wrap_binary_ops(jlcxx::Module& mod) { void wrap_linalg_ops(jlcxx::Module& mod) { mod.set_const("SOLVE", legate::LocalTaskID{CuPyNumericOpCode::CUPYNUMERIC_SOLVE}); mod.set_const("MP_SOLVE", legate::LocalTaskID{CuPyNumericOpCode::CUPYNUMERIC_MP_SOLVE}); + mod.set_const("SVD", legate::LocalTaskID{CuPyNumericOpCode::CUPYNUMERIC_SVD}); } \ No newline at end of file diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl index 1c01b44a..9fa31bb1 100644 --- a/src/ndarray/linalg.jl +++ b/src/ndarray/linalg.jl @@ -52,51 +52,97 @@ function solve_batched(a::NDArray{T,N}, b::NDArray, x::NDArray) where {T,N} Legate.submit_manual_task(rt, task) end -# solve runs in floating point: -# int/bool inputs promote to Float64 (matching cupynumeric) -const _SOLVE_PROMOTABLE = Union{SUPPORTED_INT_TYPES,Bool} -const _SOLVE_ACCEPTED = Union{SUPPORTED_SOLVE_TYPES,_SOLVE_PROMOTABLE} -_solve_eltype(::Type{T}) where {T<:_SOLVE_PROMOTABLE} = Float64 -_solve_eltype(::Type{T}) where {T<:SUPPORTED_SOLVE_TYPES} = T - -# Type/dim guards dispatch on one argument at a time, then forward to `_solve`. -function solve(a::NDArray{<:_SOLVE_ACCEPTED}, b::NDArray{<:_SOLVE_ACCEPTED}) - A, B = eltype(a), eltype(b) - O = promote_type(_solve_eltype(A), _solve_eltype(B)) - # int/bool -> float is an implicit promotion, disallowed unless `allowpromotion` - A <: _SOLVE_PROMOTABLE && assertpromotion(solve, A, O) - B <: _SOLVE_PROMOTABLE && assertpromotion(solve, B, O) - return _solve_check_a_dims(unchecked_promote_arr(a, O), unchecked_promote_arr(b, O)) +function nda_to_logical_array(arr::NDArray{T,N}) where {T,N} + st_handle = cuNumeric.get_store(arr) + return Legate.LogicalArray{T,N}(st_handle, size(arr)) end -function solve(a::NDArray, b::NDArray) - bad = eltype(a) <: _SOLVE_ACCEPTED ? eltype(b) : eltype(a) - throw(ArgumentError("array type $bad is unsupported in solve")) -end +function svd_single(a::NDArray{T,N}, u::NDArray, s::NDArray, vh::NDArray) where {T,N} + rt = Legate.get_runtime(); + lib = cuNumeric.get_lib(); -# `a` must be at least 2D, `b` at least 1D. -function _solve_check_a_dims(a::NDArray{<:Any,0}, b::NDArray) - throw(ArgumentError("0-dimensional array given. Array must be at least two-dimensional")) + task = Legate.create_auto_task(rt, lib, cuNumeric.SVD); + + l_a = nda_to_logical_array(a) + l_u = nda_to_logical_array(u) + l_s = nda_to_logical_array(s) + l_vh = nda_to_logical_array(vh) + + Legate.add_input(task, l_a) + Legate.add_output(task, l_u) + Legate.add_output(task, l_s) + Legate.add_output(task, l_vh) + + Legate.add_broadcast(task, l_a) + Legate.add_broadcast(task, l_u) + Legate.add_broadcast(task, l_s) + Legate.add_broadcast(task, l_vh) + + Legate.submit_auto_task(rt, task) end -function _solve_check_a_dims(a::NDArray{<:Any,1}, b::NDArray) + +# Dimension guards +function solve(a::NDArray{T,1}, b::NDArray{S,M}) where {T,S,M} throw(ArgumentError("1-dimensional array given. Array must be at least two-dimensional")) end -_solve_check_a_dims(a::NDArray, b::NDArray) = _solve_check_b_dims(a, b) -function _solve_check_b_dims(a::NDArray, b::NDArray{<:Any,0}) +function solve(a::NDArray{T,0}, b::NDArray{S,M}) where {T,S,M} + throw(ArgumentError("0-dimensional array given. Array must be at least two-dimensional")) +end + +function solve(a::NDArray{T,N}, b::NDArray{S,0}) where {T,N,S} throw(ArgumentError("0-dimensional array given. Array must be at least one-dimensional")) end -_solve_check_b_dims(a::NDArray, b::NDArray) = _solve(a, b) -# 2D case: (m,m),(m)->(m). -# Backend needs rhs "b" to be 2D. We reshape b from (n,) to (n,1) -function _solve(a::NDArray{T,2}, b::NDArray{S,1}) where {T,S} - m = size(b)[1] - return reshape(_solve(a, reshape(b, (m, 1))), (m,)) +# Float16 guards + +# @static if HAS_CUDA +# function solve(a::NDArray{Float16,N}, b::NDArray{S,M}) where {N,S,M} +# throw(ArgumentError("array type float16 is unsupported in linalg")) +# end + +# function solve(a::NDArray{T,N}, b::NDArray{Float16,M}) where {T,N,M} +# throw(ArgumentError("array type float16 is unsupported in linalg")) +# end + +# 2D case: (m,m),(m)->( m) +function solve(a::NDArray{T,2}, b::NDArray{S,1}) where {T,S} + size(a)[end - 1] != size(a)[end] && + throw(ArgumentError("Last 2 dimensions of the array must be square")) + size(a)[2] != size(b)[1] && + throw( + ArgumentError( + "Input operand 1 has a mismatch in its dimension 0, " * + "with signature (m,m),(m)->(m) (size $(size(b)[1]) " * + "is different from $(size(a)[2]))", + ), + ) + prod(size(a)) == 0 || prod(size(b)) == 0 && return zeros(T, size(b)...) + x = zeros(T, size(b)...) + solve_batched(a, b, x) + return x +end + +# 2D case: (m,m),(m,n)->(m,n) +function solve(a::NDArray{T,2}, b::NDArray{S,2}) where {T,S} + size(a)[end - 1] != size(a)[end] && + throw(ArgumentError("Last 2 dimensions of the array must be square")) + size(a)[2] != size(b)[1] && + throw( + ArgumentError( + "Input operand 1 has a mismatch in its dimension 0, " * + "with signature (m,m),(m,n)->(m,n) (size $(size(b)[1]) " * + "is different from $(size(a)[2]))", + ), + ) + prod(size(a)) == 0 || prod(size(b)) == 0 && return zeros(T, size(b)...) + x = zeros(T, size(b)...) + solve_batched(a, b, x) + return x end -# 2D (m,m),(m,n)->(m,n) and batched (...,m,m),(...,m,n)->(...,m,n) -function _solve(a::NDArray{T,N}, b::NDArray{S,N}) where {T,S,N} +# Batched case: (...,m,m),(...,m,n)->(...,m,n) +function solve(a::NDArray{T,N}, b::NDArray{S,N}) where {T,S,N} size(a)[end - 1] != size(a)[end] && throw(ArgumentError("Last 2 dimensions of the array must be square")) size(a)[end] != size(b)[end - 1] && @@ -114,6 +160,6 @@ function _solve(a::NDArray{T,N}, b::NDArray{S,N}) where {T,S,N} end # Mismatched batch dimensions -function _solve(a::NDArray{T,N}, b::NDArray{S,M}) where {T,N,S,M} +function solve(a::NDArray{T,N}, b::NDArray{S,M}) where {T,N,S,M} throw(ArgumentError("Batched matrices require signature (...,m,m),(...,m,n)->(...,m,n)")) end From 73e840cbcb6cf1313a8eee9c4d3e63c764d165ff Mon Sep 17 00:00:00 2001 From: Nader-Rahhal Date: Tue, 2 Jun 2026 18:13:36 -0400 Subject: [PATCH 2/6] move nda_to_logical_array --- ext/CUDAExt/cuda.jl | 5 ----- src/ndarray/detail/ndarray.jl | 6 ++++++ src/ndarray/linalg.jl | 5 ----- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/ext/CUDAExt/cuda.jl b/ext/CUDAExt/cuda.jl index 33421640..c8fd5c01 100644 --- a/ext/CUDAExt/cuda.jl +++ b/ext/CUDAExt/cuda.jl @@ -74,11 +74,6 @@ function check_sz(arr, maxshape) end end -function nda_to_logical_array(arr::NDArray{T,N}) where {T,N} - st_handle = cuNumeric.get_store(arr) - return Legate.LogicalArray{T,N}(st_handle, size(arr)) -end - function Launch(kernel::cuNumeric.CUDATask, inputs::Tuple{Vararg{NDArray}}, outputs::Tuple{Vararg{NDArray}}, scalars::Tuple{Vararg{Any}}; blocks, threads) diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index 9d630fcc..926a0fe8 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -511,3 +511,9 @@ function nda_to_logical_store(arr::NDArray{T,N}) where {T,N} st_handle = Legate.data(Legate.LogicalArray{T,N}(la_handle, size(arr))) return Legate.LogicalStore{T,N}(st_handle, size(arr)) end + +function nda_to_logical_array(arr::NDArray{T,N}) where {T,N} + st_handle = cuNumeric.get_store(arr) + return Legate.LogicalArray{T,N}(st_handle, size(arr)) +end + diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl index 9fa31bb1..0788f2ba 100644 --- a/src/ndarray/linalg.jl +++ b/src/ndarray/linalg.jl @@ -52,11 +52,6 @@ function solve_batched(a::NDArray{T,N}, b::NDArray, x::NDArray) where {T,N} Legate.submit_manual_task(rt, task) end -function nda_to_logical_array(arr::NDArray{T,N}) where {T,N} - st_handle = cuNumeric.get_store(arr) - return Legate.LogicalArray{T,N}(st_handle, size(arr)) -end - function svd_single(a::NDArray{T,N}, u::NDArray, s::NDArray, vh::NDArray) where {T,N} rt = Legate.get_runtime(); lib = cuNumeric.get_lib(); From 993a9bd0991b0b7f13d22616b6acfadef34509b5 Mon Sep 17 00:00:00 2001 From: Nader-Rahhal Date: Tue, 2 Jun 2026 22:17:10 -0400 Subject: [PATCH 3/6] dynamic dispatch for error checking --- src/ndarray/linalg.jl | 94 +++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 21 deletions(-) diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl index 0788f2ba..11187974 100644 --- a/src/ndarray/linalg.jl +++ b/src/ndarray/linalg.jl @@ -52,29 +52,8 @@ function solve_batched(a::NDArray{T,N}, b::NDArray, x::NDArray) where {T,N} Legate.submit_manual_task(rt, task) end -function svd_single(a::NDArray{T,N}, u::NDArray, s::NDArray, vh::NDArray) where {T,N} - rt = Legate.get_runtime(); - lib = cuNumeric.get_lib(); - - task = Legate.create_auto_task(rt, lib, cuNumeric.SVD); - - l_a = nda_to_logical_array(a) - l_u = nda_to_logical_array(u) - l_s = nda_to_logical_array(s) - l_vh = nda_to_logical_array(vh) - - Legate.add_input(task, l_a) - Legate.add_output(task, l_u) - Legate.add_output(task, l_s) - Legate.add_output(task, l_vh) - Legate.add_broadcast(task, l_a) - Legate.add_broadcast(task, l_u) - Legate.add_broadcast(task, l_s) - Legate.add_broadcast(task, l_vh) - Legate.submit_auto_task(rt, task) -end # Dimension guards function solve(a::NDArray{T,1}, b::NDArray{S,M}) where {T,S,M} @@ -158,3 +137,76 @@ end function solve(a::NDArray{T,N}, b::NDArray{S,M}) where {T,N,S,M} throw(ArgumentError("Batched matrices require signature (...,m,m),(...,m,n)->(...,m,n)")) end + +function svd_single(a::NDArray{T,N}, u::NDArray, s::NDArray, vh::NDArray) where {T,N} + rt = Legate.get_runtime(); + lib = cuNumeric.get_lib(); + + task = Legate.create_auto_task(rt, lib, cuNumeric.SVD); + + l_a = nda_to_logical_array(a) + l_u = nda_to_logical_array(u) + l_s = nda_to_logical_array(s) + l_vh = nda_to_logical_array(vh) + + Legate.add_input(task, l_a) + Legate.add_output(task, l_u) + Legate.add_output(task, l_s) + Legate.add_output(task, l_vh) + + Legate.add_broadcast(task, l_a) + Legate.add_broadcast(task, l_u) + Legate.add_broadcast(task, l_s) + Legate.add_broadcast(task, l_vh) + + Legate.submit_auto_task(rt, task) +end + +function _svd(a::NDArray{T,2}, full_matrices::Bool) where {T} + m, n = size(a) + k = min(m, n) + u = full_matrices ? zeros(T, m, m) : zeros(T, m, k) + s = zeros(T, k) + vh = full_matrices ? zeros(T, n, n) : zeros(T, k, n) + svd_single(a, u, s, vh) + return u, s, vh +end + + +function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} + if N < 2 + throw(LinAlgError("$(N)-dimensional array given. Array must be at least two-dimensional")) + end + if N > 2 + throw(ArgumentError("cuNumeric does not yet support stacked 2d arrays")) + end + if size(a)[1] < size(a)[2] + throw(ArgumentError("cuNumeric only supports M >= N")) + end + if T == Float16 + throw(ArgumentError("array type float16 is unsupported in linalg")) + end + return _svd(a, full_matrices) +end + +# Dimension guards +function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} + throw(ArgumentError("$(N)-dimensional array given. Array must be at least two-dimensional")) +end + +# Stacked 2D guard +function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} + throw(ArgumentError("cuNumeric does not yet support stacked 2d arrays")) +end + +# Float16 guard +# function svd(a::NDArray{Float16,2}, full_matrices::Bool=true) +# throw(ArgumentError("array type float16 is unsupported in linalg")) +# end + +# M < N guard +function svd(a::NDArray{T,2}, full_matrices::Bool=true) where {T} + size(a)[1] < size(a)[2] && + throw(ArgumentError("cuNumeric only supports M >= N")) + return _svd(a, full_matrices) +end From 88b4945ee5b59046a4517a271f4caad771521244 Mon Sep 17 00:00:00 2001 From: Nader-Rahhal Date: Wed, 3 Jun 2026 14:48:40 -0400 Subject: [PATCH 4/6] add tests --- src/cuNumeric.jl | 2 +- src/ndarray/linalg.jl | 32 ++------- test/tests/linalg.jl | 147 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 29 deletions(-) diff --git a/src/cuNumeric.jl b/src/cuNumeric.jl index b4f18637..1fe3c97f 100644 --- a/src/cuNumeric.jl +++ b/src/cuNumeric.jl @@ -74,7 +74,7 @@ const SUPPORTED_LINALG_TYPES = Union{ # solve has no integer/Float16 backend kernel — float/complex only. const SUPPORTED_SOLVE_TYPES = Union{Float32,Float64,SUPPORTED_COMPLEX_TYPES} - +const SUPPORTED_SVD_TYPES = Union{Float32,Float64,SUPPORTED_COMPLEX_TYPES} const SUPPORTED_ARRAY_TYPES = Union{Bool,SUPPORTED_NUMERIC_TYPES} const SUPPORTED_TYPES = Union{SUPPORTED_ARRAY_TYPES,String} diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl index 11187974..e13ce0e3 100644 --- a/src/ndarray/linalg.jl +++ b/src/ndarray/linalg.jl @@ -172,41 +172,17 @@ function _svd(a::NDArray{T,2}, full_matrices::Bool) where {T} return u, s, vh end - -function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} - if N < 2 - throw(LinAlgError("$(N)-dimensional array given. Array must be at least two-dimensional")) - end - if N > 2 - throw(ArgumentError("cuNumeric does not yet support stacked 2d arrays")) - end +function svd(a::NDArray{T,2}, full_matrices::Bool=true) where {T} if size(a)[1] < size(a)[2] throw(ArgumentError("cuNumeric only supports M >= N")) end - if T == Float16 - throw(ArgumentError("array type float16 is unsupported in linalg")) - end return _svd(a, full_matrices) end -# Dimension guards -function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} - throw(ArgumentError("$(N)-dimensional array given. Array must be at least two-dimensional")) +function svd(a::NDArray{T,1}, full_matrices::Bool=true) where {T} + throw(ArgumentError("1-dimensional array given. Array must be at least two-dimensional")) end -# Stacked 2D guard function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} throw(ArgumentError("cuNumeric does not yet support stacked 2d arrays")) -end - -# Float16 guard -# function svd(a::NDArray{Float16,2}, full_matrices::Bool=true) -# throw(ArgumentError("array type float16 is unsupported in linalg")) -# end - -# M < N guard -function svd(a::NDArray{T,2}, full_matrices::Bool=true) where {T} - size(a)[1] < size(a)[2] && - throw(ArgumentError("cuNumeric only supports M >= N")) - return _svd(a, full_matrices) -end +end \ No newline at end of file diff --git a/test/tests/linalg.jl b/test/tests/linalg.jl index 8f9c060d..666e56b4 100644 --- a/test/tests/linalg.jl +++ b/test/tests/linalg.jl @@ -185,3 +185,150 @@ end end end end + +function check_svd_reconstruction(ref_A::AbstractMatrix, u, s, vh, tol_a, tol_r) + U = Array(u) + S = Array(s) + Vh = Array(vh) + A_rec = U * Diagonal(S) * Vh + return isapprox(ref_A, A_rec; atol=tol_a, rtol=tol_r) +end + +function check_svd_orthonormality(u, vh, tol_a, tol_r) + U = Array(u) + Vh = Array(vh) + k = size(U, 2) + ok_u = isapprox(U' * U, Matrix{eltype(U)}(I, k, k); atol=tol_a, rtol=tol_r) + ok_vh = isapprox(Vh * Vh', Matrix{eltype(Vh)}(I, k, k); atol=tol_a, rtol=tol_r) + return ok_u && ok_vh +end + +@testset "svd square matrix" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 5, 5) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda) + allowscalar() do + @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) + @test check_svd_orthonormality(u, vh, atol(T), rtol(T)) + end + end +end + +@testset "svd tall matrix (m > n)" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 6, 4) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda) + allowscalar() do + @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) + @test check_svd_orthonormality(u, vh, atol(T), rtol(T)) + end + end +end + +@testset "svd thin output shapes (full_matrices=false)" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + m, n = 6, 4 + k = min(m, n) + A_ref = my_rand(T, m, n) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda, false) + allowscalar() do + @test size(Array(u)) == (m, k) + @test size(Array(s)) == (k,) + @test size(Array(vh)) == (k, n) + @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) + end + end +end + +@testset "svd full output shapes (full_matrices=true)" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + m, n = 6, 4 + A_ref = my_rand(T, m, n) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda, true) + allowscalar() do + @test size(Array(u)) == (m, m) + @test size(Array(s)) == (min(m, n),) + @test size(Array(vh)) == (n, n) + end + end +end + +@testset "svd singular values non-negative and sorted" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 5, 5) + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda) + allowscalar() do + sv = Array(s) + @test all(sv .>= 0) + @test issorted(sv; rev=true) + end + end +end + +@testset "svd identity matrix" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + n = 4 + A_ref = Matrix{T}(I, n, n) + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda) + allowscalar() do + @test cuNumeric.compare(ones(T, n), s, atol(T), rtol(T)) + end + end +end + +@testset "svd rank-1 matrix" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + # outer product of two vectors: exactly one nonzero singular value + # 5×4 satisfies the M >= N constraint + v1 = T.(collect(1:5)) + v2 = T.(collect(1:4)) + A_ref = v1 * v2' + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda) + allowscalar() do + sv = Array(s) + @test sv[1] > atol(T) + @test all(sv[2:end] .< atol(T) * 10) + end + end +end + +@testset "svd agrees with LinearAlgebra.svd" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 5, 4) + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda, false) + ref = LinearAlgebra.svd(A_ref) + allowscalar() do + # Singular values must match; signs of singular vectors may differ + @test cuNumeric.compare(ref.S, s, atol(T), rtol(T)) + end + end +end + +@testset "svd 1d guard" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + v = cuNumeric.NDArray(T.(collect(1:4))) + @test_throws ArgumentError cuNumeric.svd(v) + end +end + +@testset "svd 3d guard" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A = cuNumeric.NDArray(my_rand(T, 3, 3, 3)) + @test_throws ArgumentError cuNumeric.svd(A) + end +end + +@testset "svd M < N guard" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A = cuNumeric.NDArray(my_rand(T, 3, 5)) + @test_throws ArgumentError cuNumeric.svd(A) + end +end \ No newline at end of file From b7f19d204f2a63f4b610944ba7c53693c66790cd Mon Sep 17 00:00:00 2001 From: Nader-Rahhal Date: Wed, 3 Jun 2026 15:07:37 -0400 Subject: [PATCH 5/6] split linalg into public API and implementation detail Move internal helpers (choose_nd_color_shape, prepare_manual_task_for_batched_matrices, solve_batched, svd_single, _svd) into detail/linalg.jl, leaving only user-facing solve() and svd() overloads in ndarray/linalg.jl. Co-Authored-By: Claude Sonnet 4.6 --- src/cuNumeric.jl | 1 + src/ndarray/detail/linalg.jl | 87 +++++++++++++++++++++ src/ndarray/linalg.jl | 104 +------------------------ test/tests/linalg.jl | 147 ----------------------------------- 4 files changed, 89 insertions(+), 250 deletions(-) create mode 100644 src/ndarray/detail/linalg.jl diff --git a/src/cuNumeric.jl b/src/cuNumeric.jl index 1fe3c97f..ecaa1abb 100644 --- a/src/cuNumeric.jl +++ b/src/cuNumeric.jl @@ -158,6 +158,7 @@ include("ndarray/broadcast.jl") include("ndarray/ndarray.jl") include("ndarray/unary.jl") include("ndarray/binary.jl") +include("ndarray/detail/linalg.jl") include("ndarray/linalg.jl") # scoping macro diff --git a/src/ndarray/detail/linalg.jl b/src/ndarray/detail/linalg.jl new file mode 100644 index 00000000..64556e6d --- /dev/null +++ b/src/ndarray/detail/linalg.jl @@ -0,0 +1,87 @@ +function choose_nd_color_shape(shape::NTuple{N,Int}) where {N} + color_shape = Base.ones(Int, N) + if N > 2 + color_shape[1] = Legate.num_procs() + done = false + while !done && color_shape[1] % 2 == 0 + weight_per_dim = [shape[i] / color_shape[i] for i in 1:(N - 2)] + max_weight, idx = findmax(weight_per_dim) + if weight_per_dim[idx] > 2 * weight_per_dim[1] + color_shape[1] ÷= 2 + color_shape[idx] *= 2 + else + done = true + end + end + end + return Tuple(color_shape) +end + +function prepare_manual_task_for_batched_matrices(full_shape::NTuple{N,Int}) where {N} + initial_color_shape = choose_nd_color_shape(full_shape) + tilesize = Tuple( + (full_shape[i] + initial_color_shape[i] - 1) ÷ initial_color_shape[i] for i in 1:N + ) + color_shape = Tuple((full_shape[i] + tilesize[i] - 1) ÷ tilesize[i] for i in 1:N) + return tilesize, color_shape +end + +function solve_batched(a::NDArray{T,N}, b::NDArray, x::NDArray) where {T,N} + nrhs = size(b)[end] + full_shape = size(a) + tilesize_a, color_shape = prepare_manual_task_for_batched_matrices(full_shape) + tilesize_b = (tilesize_a[1:(end - 1)]..., nrhs) + + store_a = nda_to_logical_store(a) + store_b = nda_to_logical_store(b) + store_x = nda_to_logical_store(x) + + tiled_a = Legate.partition_by_tiling(store_a, collect(tilesize_a)) + tiled_b = Legate.partition_by_tiling(store_b, collect(tilesize_b)) + tiled_x = Legate.partition_by_tiling(store_x, collect(tilesize_b)) + + rt = Legate.get_runtime() + domain = Legate.domain_from_shape(Legate.Shape(Legate.to_cxx_vector(color_shape))) + lib = cuNumeric.get_lib() + task = Legate.create_manual_task(rt, lib, cuNumeric.SOLVE, domain) + + Legate.add_input(task, tiled_a) + Legate.add_input(task, tiled_b) + Legate.add_output(task, tiled_x) + + Legate.submit_manual_task(rt, task) +end + +function svd_single(a::NDArray{T,N}, u::NDArray, s::NDArray, vh::NDArray) where {T,N} + rt = Legate.get_runtime(); + lib = cuNumeric.get_lib(); + + task = Legate.create_auto_task(rt, lib, cuNumeric.SVD); + + l_a = nda_to_logical_array(a) + l_u = nda_to_logical_array(u) + l_s = nda_to_logical_array(s) + l_vh = nda_to_logical_array(vh) + + Legate.add_input(task, l_a) + Legate.add_output(task, l_u) + Legate.add_output(task, l_s) + Legate.add_output(task, l_vh) + + Legate.add_broadcast(task, l_a) + Legate.add_broadcast(task, l_u) + Legate.add_broadcast(task, l_s) + Legate.add_broadcast(task, l_vh) + + Legate.submit_auto_task(rt, task) +end + +function _svd(a::NDArray{T,2}, full_matrices::Bool) where {T} + m, n = size(a) + k = min(m, n) + u = full_matrices ? zeros(T, m, m) : zeros(T, m, k) + s = zeros(T, k) + vh = full_matrices ? zeros(T, n, n) : zeros(T, k, n) + svd_single(a, u, s, vh) + return u, s, vh +end diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl index e13ce0e3..2943a6e3 100644 --- a/src/ndarray/linalg.jl +++ b/src/ndarray/linalg.jl @@ -1,60 +1,3 @@ -function choose_nd_color_shape(shape::NTuple{N,Int}) where {N} - color_shape = Base.ones(Int, N) - if N > 2 - color_shape[1] = Legate.num_procs() - done = false - while !done && color_shape[1] % 2 == 0 - weight_per_dim = [shape[i] / color_shape[i] for i in 1:(N - 2)] - max_weight, idx = findmax(weight_per_dim) - if weight_per_dim[idx] > 2 * weight_per_dim[1] - color_shape[1] ÷= 2 - color_shape[idx] *= 2 - else - done = true - end - end - end - return Tuple(color_shape) -end - -function prepare_manual_task_for_batched_matrices(full_shape::NTuple{N,Int}) where {N} - initial_color_shape = choose_nd_color_shape(full_shape) - tilesize = Tuple( - (full_shape[i] + initial_color_shape[i] - 1) ÷ initial_color_shape[i] for i in 1:N - ) - color_shape = Tuple((full_shape[i] + tilesize[i] - 1) ÷ tilesize[i] for i in 1:N) - return tilesize, color_shape -end - -function solve_batched(a::NDArray{T,N}, b::NDArray, x::NDArray) where {T,N} - nrhs = size(b)[end] - full_shape = size(a) - tilesize_a, color_shape = prepare_manual_task_for_batched_matrices(full_shape) - tilesize_b = (tilesize_a[1:(end - 1)]..., nrhs) - - store_a = nda_to_logical_store(a) - store_b = nda_to_logical_store(b) - store_x = nda_to_logical_store(x) - - tiled_a = Legate.partition_by_tiling(store_a, collect(tilesize_a)) - tiled_b = Legate.partition_by_tiling(store_b, collect(tilesize_b)) - tiled_x = Legate.partition_by_tiling(store_x, collect(tilesize_b)) - - rt = Legate.get_runtime() - domain = Legate.domain_from_shape(Legate.Shape(Legate.to_cxx_vector(color_shape))) - lib = cuNumeric.get_lib() - task = Legate.create_manual_task(rt, lib, cuNumeric.SOLVE, domain) - - Legate.add_input(task, tiled_a) - Legate.add_input(task, tiled_b) - Legate.add_output(task, tiled_x) - - Legate.submit_manual_task(rt, task) -end - - - - # Dimension guards function solve(a::NDArray{T,1}, b::NDArray{S,M}) where {T,S,M} throw(ArgumentError("1-dimensional array given. Array must be at least two-dimensional")) @@ -68,17 +11,6 @@ function solve(a::NDArray{T,N}, b::NDArray{S,0}) where {T,N,S} throw(ArgumentError("0-dimensional array given. Array must be at least one-dimensional")) end -# Float16 guards - -# @static if HAS_CUDA -# function solve(a::NDArray{Float16,N}, b::NDArray{S,M}) where {N,S,M} -# throw(ArgumentError("array type float16 is unsupported in linalg")) -# end - -# function solve(a::NDArray{T,N}, b::NDArray{Float16,M}) where {T,N,M} -# throw(ArgumentError("array type float16 is unsupported in linalg")) -# end - # 2D case: (m,m),(m)->( m) function solve(a::NDArray{T,2}, b::NDArray{S,1}) where {T,S} size(a)[end - 1] != size(a)[end] && @@ -138,40 +70,6 @@ function solve(a::NDArray{T,N}, b::NDArray{S,M}) where {T,N,S,M} throw(ArgumentError("Batched matrices require signature (...,m,m),(...,m,n)->(...,m,n)")) end -function svd_single(a::NDArray{T,N}, u::NDArray, s::NDArray, vh::NDArray) where {T,N} - rt = Legate.get_runtime(); - lib = cuNumeric.get_lib(); - - task = Legate.create_auto_task(rt, lib, cuNumeric.SVD); - - l_a = nda_to_logical_array(a) - l_u = nda_to_logical_array(u) - l_s = nda_to_logical_array(s) - l_vh = nda_to_logical_array(vh) - - Legate.add_input(task, l_a) - Legate.add_output(task, l_u) - Legate.add_output(task, l_s) - Legate.add_output(task, l_vh) - - Legate.add_broadcast(task, l_a) - Legate.add_broadcast(task, l_u) - Legate.add_broadcast(task, l_s) - Legate.add_broadcast(task, l_vh) - - Legate.submit_auto_task(rt, task) -end - -function _svd(a::NDArray{T,2}, full_matrices::Bool) where {T} - m, n = size(a) - k = min(m, n) - u = full_matrices ? zeros(T, m, m) : zeros(T, m, k) - s = zeros(T, k) - vh = full_matrices ? zeros(T, n, n) : zeros(T, k, n) - svd_single(a, u, s, vh) - return u, s, vh -end - function svd(a::NDArray{T,2}, full_matrices::Bool=true) where {T} if size(a)[1] < size(a)[2] throw(ArgumentError("cuNumeric only supports M >= N")) @@ -185,4 +83,4 @@ end function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} throw(ArgumentError("cuNumeric does not yet support stacked 2d arrays")) -end \ No newline at end of file +end diff --git a/test/tests/linalg.jl b/test/tests/linalg.jl index 666e56b4..8f9c060d 100644 --- a/test/tests/linalg.jl +++ b/test/tests/linalg.jl @@ -185,150 +185,3 @@ end end end end - -function check_svd_reconstruction(ref_A::AbstractMatrix, u, s, vh, tol_a, tol_r) - U = Array(u) - S = Array(s) - Vh = Array(vh) - A_rec = U * Diagonal(S) * Vh - return isapprox(ref_A, A_rec; atol=tol_a, rtol=tol_r) -end - -function check_svd_orthonormality(u, vh, tol_a, tol_r) - U = Array(u) - Vh = Array(vh) - k = size(U, 2) - ok_u = isapprox(U' * U, Matrix{eltype(U)}(I, k, k); atol=tol_a, rtol=tol_r) - ok_vh = isapprox(Vh * Vh', Matrix{eltype(Vh)}(I, k, k); atol=tol_a, rtol=tol_r) - return ok_u && ok_vh -end - -@testset "svd square matrix" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - A_ref = my_rand(T, 5, 5) - nda = cuNumeric.NDArray(A_ref) - u, s, vh = cuNumeric.svd(nda) - allowscalar() do - @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) - @test check_svd_orthonormality(u, vh, atol(T), rtol(T)) - end - end -end - -@testset "svd tall matrix (m > n)" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - A_ref = my_rand(T, 6, 4) - nda = cuNumeric.NDArray(A_ref) - u, s, vh = cuNumeric.svd(nda) - allowscalar() do - @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) - @test check_svd_orthonormality(u, vh, atol(T), rtol(T)) - end - end -end - -@testset "svd thin output shapes (full_matrices=false)" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - m, n = 6, 4 - k = min(m, n) - A_ref = my_rand(T, m, n) - nda = cuNumeric.NDArray(A_ref) - u, s, vh = cuNumeric.svd(nda, false) - allowscalar() do - @test size(Array(u)) == (m, k) - @test size(Array(s)) == (k,) - @test size(Array(vh)) == (k, n) - @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) - end - end -end - -@testset "svd full output shapes (full_matrices=true)" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - m, n = 6, 4 - A_ref = my_rand(T, m, n) - nda = cuNumeric.NDArray(A_ref) - u, s, vh = cuNumeric.svd(nda, true) - allowscalar() do - @test size(Array(u)) == (m, m) - @test size(Array(s)) == (min(m, n),) - @test size(Array(vh)) == (n, n) - end - end -end - -@testset "svd singular values non-negative and sorted" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - A_ref = my_rand(T, 5, 5) - nda = cuNumeric.NDArray(A_ref) - _, s, _ = cuNumeric.svd(nda) - allowscalar() do - sv = Array(s) - @test all(sv .>= 0) - @test issorted(sv; rev=true) - end - end -end - -@testset "svd identity matrix" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - n = 4 - A_ref = Matrix{T}(I, n, n) - nda = cuNumeric.NDArray(A_ref) - _, s, _ = cuNumeric.svd(nda) - allowscalar() do - @test cuNumeric.compare(ones(T, n), s, atol(T), rtol(T)) - end - end -end - -@testset "svd rank-1 matrix" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - # outer product of two vectors: exactly one nonzero singular value - # 5×4 satisfies the M >= N constraint - v1 = T.(collect(1:5)) - v2 = T.(collect(1:4)) - A_ref = v1 * v2' - nda = cuNumeric.NDArray(A_ref) - _, s, _ = cuNumeric.svd(nda) - allowscalar() do - sv = Array(s) - @test sv[1] > atol(T) - @test all(sv[2:end] .< atol(T) * 10) - end - end -end - -@testset "svd agrees with LinearAlgebra.svd" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - A_ref = my_rand(T, 5, 4) - nda = cuNumeric.NDArray(A_ref) - _, s, _ = cuNumeric.svd(nda, false) - ref = LinearAlgebra.svd(A_ref) - allowscalar() do - # Singular values must match; signs of singular vectors may differ - @test cuNumeric.compare(ref.S, s, atol(T), rtol(T)) - end - end -end - -@testset "svd 1d guard" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - v = cuNumeric.NDArray(T.(collect(1:4))) - @test_throws ArgumentError cuNumeric.svd(v) - end -end - -@testset "svd 3d guard" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - A = cuNumeric.NDArray(my_rand(T, 3, 3, 3)) - @test_throws ArgumentError cuNumeric.svd(A) - end -end - -@testset "svd M < N guard" begin - @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) - A = cuNumeric.NDArray(my_rand(T, 3, 5)) - @test_throws ArgumentError cuNumeric.svd(A) - end -end \ No newline at end of file From 8b710d33db868b8f2f327218c18926c83977e22c Mon Sep 17 00:00:00 2001 From: Nader-Rahhal Date: Wed, 3 Jun 2026 15:09:03 -0400 Subject: [PATCH 6/6] add back tests --- src/ndarray/linalg.jl | 2 +- test/tests/linalg.jl | 147 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl index 2943a6e3..719085df 100644 --- a/src/ndarray/linalg.jl +++ b/src/ndarray/linalg.jl @@ -83,4 +83,4 @@ end function svd(a::NDArray{T,N}, full_matrices::Bool=true) where {T,N} throw(ArgumentError("cuNumeric does not yet support stacked 2d arrays")) -end +end \ No newline at end of file diff --git a/test/tests/linalg.jl b/test/tests/linalg.jl index 8f9c060d..666e56b4 100644 --- a/test/tests/linalg.jl +++ b/test/tests/linalg.jl @@ -185,3 +185,150 @@ end end end end + +function check_svd_reconstruction(ref_A::AbstractMatrix, u, s, vh, tol_a, tol_r) + U = Array(u) + S = Array(s) + Vh = Array(vh) + A_rec = U * Diagonal(S) * Vh + return isapprox(ref_A, A_rec; atol=tol_a, rtol=tol_r) +end + +function check_svd_orthonormality(u, vh, tol_a, tol_r) + U = Array(u) + Vh = Array(vh) + k = size(U, 2) + ok_u = isapprox(U' * U, Matrix{eltype(U)}(I, k, k); atol=tol_a, rtol=tol_r) + ok_vh = isapprox(Vh * Vh', Matrix{eltype(Vh)}(I, k, k); atol=tol_a, rtol=tol_r) + return ok_u && ok_vh +end + +@testset "svd square matrix" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 5, 5) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda) + allowscalar() do + @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) + @test check_svd_orthonormality(u, vh, atol(T), rtol(T)) + end + end +end + +@testset "svd tall matrix (m > n)" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 6, 4) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda) + allowscalar() do + @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) + @test check_svd_orthonormality(u, vh, atol(T), rtol(T)) + end + end +end + +@testset "svd thin output shapes (full_matrices=false)" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + m, n = 6, 4 + k = min(m, n) + A_ref = my_rand(T, m, n) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda, false) + allowscalar() do + @test size(Array(u)) == (m, k) + @test size(Array(s)) == (k,) + @test size(Array(vh)) == (k, n) + @test check_svd_reconstruction(A_ref, u, s, vh, atol(T), rtol(T)) + end + end +end + +@testset "svd full output shapes (full_matrices=true)" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + m, n = 6, 4 + A_ref = my_rand(T, m, n) + nda = cuNumeric.NDArray(A_ref) + u, s, vh = cuNumeric.svd(nda, true) + allowscalar() do + @test size(Array(u)) == (m, m) + @test size(Array(s)) == (min(m, n),) + @test size(Array(vh)) == (n, n) + end + end +end + +@testset "svd singular values non-negative and sorted" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 5, 5) + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda) + allowscalar() do + sv = Array(s) + @test all(sv .>= 0) + @test issorted(sv; rev=true) + end + end +end + +@testset "svd identity matrix" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + n = 4 + A_ref = Matrix{T}(I, n, n) + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda) + allowscalar() do + @test cuNumeric.compare(ones(T, n), s, atol(T), rtol(T)) + end + end +end + +@testset "svd rank-1 matrix" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + # outer product of two vectors: exactly one nonzero singular value + # 5×4 satisfies the M >= N constraint + v1 = T.(collect(1:5)) + v2 = T.(collect(1:4)) + A_ref = v1 * v2' + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda) + allowscalar() do + sv = Array(s) + @test sv[1] > atol(T) + @test all(sv[2:end] .< atol(T) * 10) + end + end +end + +@testset "svd agrees with LinearAlgebra.svd" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A_ref = my_rand(T, 5, 4) + nda = cuNumeric.NDArray(A_ref) + _, s, _ = cuNumeric.svd(nda, false) + ref = LinearAlgebra.svd(A_ref) + allowscalar() do + # Singular values must match; signs of singular vectors may differ + @test cuNumeric.compare(ref.S, s, atol(T), rtol(T)) + end + end +end + +@testset "svd 1d guard" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + v = cuNumeric.NDArray(T.(collect(1:4))) + @test_throws ArgumentError cuNumeric.svd(v) + end +end + +@testset "svd 3d guard" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A = cuNumeric.NDArray(my_rand(T, 3, 3, 3)) + @test_throws ArgumentError cuNumeric.svd(A) + end +end + +@testset "svd M < N guard" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SVD_TYPES) + A = cuNumeric.NDArray(my_rand(T, 3, 5)) + @test_throws ArgumentError cuNumeric.svd(A) + end +end \ No newline at end of file