Skip to content

Commit 3fc68b2

Browse files
add eta and docstring
1 parent 00de37e commit 3fc68b2

File tree

2 files changed

+67
-18
lines changed

2 files changed

+67
-18
lines changed

src/solver/linear/preconditioners/l1_gauss_seidel.jl

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ The preconditioner matrix $M_{ℓ_1}$ is defined as:
4545
M_{ℓ_1GS} = M_{HGS} + D^{ℓ_1} \\
4646
```
4747
Where $D^{ℓ_1}$ is a diagonal matrix with entries: $d_{ii}^{ℓ_1} = \sum_{j ∈ Ωⁱₒ} |a_{ij}|$, and $M_{HGS}$ is obtained when the diagonal partitions are chosen to be the Gauss–Seidel sweeps on $ A_{kk} $
48+
However, we use another convergant variant, which takes adavantage of the local estimation of θ ( $a_{ii} >= θ * d_{ii}$):
49+
```math
50+
M_{ℓ_1GS*} = M_{HGS} + D^{ℓ_1*}, \quad \text{where} \quad d_{ii}^{ℓ_1*} = \begin{cases} 0, & \text{if } a_{ii} \geq \eta d_{ii}^{ℓ_1}; \\ d_{ii}^{ℓ_1}/2, & \text{otherwise.} \end{cases}
51+
```
4852
4953
# Fields
5054
- `partitioning`: Encapsulates partitioning data (e.g. nparts, partsize, backend).
@@ -68,6 +72,9 @@ N = 128*16
6872
A = spdiagm(0 => 2 * ones(N), -1 => -ones(N-1), 1 => -ones(N-1))
6973
partsize = 16
7074
prec = builder(A, partsize)
75+
# NOTES:
76+
# 1. for symmetric A, that's not of type `Symmetric`, or `SparseMatrixCSR` (i.e. in CSR format), then it's recommended to set `isSymA=true` for better performance `prec = builder(A, partsize; isSymA=true)`.
77+
# 2. for any userdefined `η` value, use `prec = builder(A, partsize; η=1.2)`.
7178
```
7279
"""
7380
struct L1GSPreconditioner{Partitioning,VectorType}
@@ -94,11 +101,11 @@ struct L1GSPrecBuilder{DeviceType<:AbstractDevice}
94101
end
95102
end
96103

97-
(builder::L1GSPrecBuilder)(A::AbstractMatrix, partsize::Ti, isSymA::Bool = false) where {Ti<:Integer} =
98-
build_l1prec(builder, A, partsize, isSymA)
104+
(builder::L1GSPrecBuilder)(A::AbstractMatrix, partsize::Ti; isSymA::Bool = false, η = 1.5) where {Ti<:Integer} =
105+
build_l1prec(builder, A, partsize, isSymA, η)
99106

100-
(builder::L1GSPrecBuilder)(A::Symmetric, partsize::Ti) where {Ti<:Integer} =
101-
build_l1prec(builder, A, partsize, true)
107+
(builder::L1GSPrecBuilder)(A::Symmetric, partsize::Ti= 1.5) where {Ti<:Integer} =
108+
build_l1prec(builder, A, partsize, true, η)
102109

103110
struct DiagonalPartsIterator{Ti}
104111
size_A::Ti
@@ -117,18 +124,18 @@ struct DiagonalPartCache{Ti}
117124
end
118125

119126
## Preconditioner builder ##
120-
function build_l1prec(builder::L1GSPrecBuilder, A::MatrixType, partsize::Ti, isSymA::Bool) where {Ti<:Integer,MatrixType}
127+
function build_l1prec(builder::L1GSPrecBuilder, A::MatrixType, partsize::Ti, isSymA::Bool, η) where {Ti<:Integer,MatrixType}
121128
partsize == 0 && error("partsize must be greater than 0")
122-
_build_l1prec(builder, A, partsize, isSymA)
129+
_build_l1prec(builder, A, partsize, isSymA, η)
123130
end
124131

125-
function _build_l1prec(builder::L1GSPrecBuilder, _A::MatrixType, partsize::Ti, isSymA::Bool) where {Ti<:Integer,MatrixType}
132+
function _build_l1prec(builder::L1GSPrecBuilder, _A::MatrixType, partsize::Ti, isSymA::Bool, η) where {Ti<:Integer,MatrixType}
126133
# `nchunks` is either CPU cores or GPU blocks.
127134
# Each chunk will be assigned `nparts`, each of size `partsize`.
128135
# In GPU backend, `nchunks` is the number of blocks and `partsize` is the number of threads per block.
129136
A = get_data(_A) # for symmetric case
130137
partitioning = _blockpartitioning(builder, A, partsize)
131-
D_Dl1, SLbuffer = _precompute_blocks(A, partitioning, isSymA)
138+
D_Dl1, SLbuffer = _precompute_blocks(A, partitioning, isSymA, η)
132139
L1GSPreconditioner(partitioning, D_Dl1, SLbuffer)
133140
end
134141

@@ -330,7 +337,7 @@ _pack_strict_lower!(::AbstractMatrixSymmetry, ::CSRFormat, SLbuffer, A, start_id
330337
_pack_strict_lower_csr!(SLbuffer, getrowptr(A), colvals(A), getnzval(A), start_idx, end_idx, partsize, k)
331338

332339

333-
function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartitioning, isSymA::Bool)
340+
function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartitioning, isSymA::Bool, η)
334341
@timeit_debug "_precompute_blocks" begin
335342
# No assumptions on A, i.e. A here might be in either backend compatible format or not.
336343
# So we have to convert it to backend compatible format, if it is not already.
@@ -340,25 +347,25 @@ function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartiti
340347
(;partsize, nparts, nchunks, chunksize, backend) = partitioning
341348
A = adapt(backend, _A)
342349
N = size(A, 1)
350+
Tf = eltype(A)
343351

344-
D_Dl1 = adapt(backend, zeros(eltype(A), N)) # D + Dˡ
352+
η = convert(Tf, η)
353+
D_Dl1 = adapt(backend, zeros(Tf, N)) # D + Dˡ
345354
last_partsize = N - (nparts - 1) * partsize # size of the last partition
346355
SLbuffer_size = (partsize * (partsize - 1) * (nparts-1)) ÷ 2 + last_partsize * (last_partsize - 1) ÷ 2
347-
SLbuffer = adapt(backend, zeros(eltype(A), SLbuffer_size)) # strictly lower triangular part of all diagonal blocks stored in a 1D array
356+
SLbuffer = adapt(backend, zeros(Tf, SLbuffer_size)) # strictly lower triangular part of all diagonal blocks stored in a 1D array
348357
symA = isSymA ? SymmetricMatrix() : NonSymmetricMatrix()
349358

350359
ndrange = nchunks * chunksize
351360
@timeit_debug "kernel setup" kernel = _precompute_blocks_kernel!(backend, chunksize, ndrange)
352-
@timeit_debug "kernel execution" kernel(D_Dl1, SLbuffer, A, symA, partsize, nparts, nchunks, chunksize; ndrange=ndrange)
361+
@timeit_debug "kernel execution" kernel(D_Dl1, SLbuffer, A, symA, partsize, nparts, nchunks, chunksize, η; ndrange=ndrange)
353362
@timeit_debug "synchronize" synchronize(backend)
354363

355364
return D_Dl1, SLbuffer
356365
end
357366
end
358367

359-
const η = 1.5 # eq. (6.3)
360-
361-
@kernel function _precompute_blocks_kernel!(D_Dl1, SLbuffer, A, symA, partsize::Ti, nparts::Ti, nchunks::Ti, chunksize::Ti) where {Ti<:Integer}
368+
@kernel function _precompute_blocks_kernel!(D_Dl1, SLbuffer, A, symA, partsize::Ti, nparts::Ti, nchunks::Ti, chunksize::Ti, η) where {Ti<:Integer}
362369
initial_partition_idx = @index(Global)
363370
size_A = convert(Ti, size(A, 1))
364371
format_A = sparsemat_format_type(A)

test/test_preconditioners.jl

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ using KernelAbstractions
33
using JLD2: load
44
import Thunderbolt: ThreadedSparseMatrixCSR
55
using TimerOutputs
6+
using LinearAlgebra: Symmetric
67

78
##########################################
89
## L1 Gauss Seidel Preconditioner - CPU ##
910
##########################################
1011

1112
# Enable debug timings for Thunderbolt
12-
TimerOutputs.enable_debug_timings(Thunderbolt.Preconditioner)
13+
TimerOutputs.enable_debug_timings(Thunderbolt.Preconditioners)
1314

1415
function poisson_test_matrix(N)
1516
# Poisson's equation in 1D with Dirichlet BCs
@@ -22,7 +23,7 @@ function test_sym(testname, A, x, y_exp, D_Dl1_exp, SLbuffer_exp, partsize)
2223
total_ncores = 8 # Assuming 8 cores for testing
2324
for ncores in 1:total_ncores # testing for multiple cores to check that the answer is independent of the number of cores
2425
builder = L1GSPrecBuilder(PolyesterDevice(ncores))
25-
P = builder(A, partsize)
26+
P = A isa Symmetric ? builder(A, partsize) : builder(A, partsize; isSymA=true)
2627
@test P.D_Dl1 D_Dl1_exp
2728
@test P.SLbuffer SLbuffer_exp
2829
y = P \ x
@@ -74,7 +75,48 @@ end
7475
test_sym("CPU, CSR", B, x, y_exp, D_Dl1_exp, SLbuffer_exp, 2)
7576
C = ThreadedSparseMatrixCSR(B)
7677
test_sym("CPU, Threaded CSR", C, x, y_exp, D_Dl1_exp, SLbuffer_exp, 2)
78+
79+
@testset "η parameter" begin
80+
# Test with η = 2.0 (more strict than default 1.5)
81+
# For Poisson matrix: a_ii = 2, dl1_ii = 1 for all rows
82+
# Since a_ii = 2 >= η*dl1_ii = 2*1 = 2, all rows satisfy condition
83+
# Therefore dl1star_ii = 0 for all rows, D_Dl1 = a_ii = 2
84+
η = 2.0
85+
D_Dl1_exp = Float64.([2, 2, 2, 2, 2, 2, 2, 2, 2])
86+
SLbuffer_exp = Float64.([-1, -1, -1, -1])
87+
builder = L1GSPrecBuilder(PolyesterDevice(2))
88+
P = builder(A, 2; η=η)
89+
@test P.D_Dl1 D_Dl1_exp
90+
@test P.SLbuffer SLbuffer_exp
91+
92+
# Test with η = 3.0 (very strict)
93+
# a_ii = 2 < η*dl1_ii = 3*1 = 3, condition NOT satisfied
94+
# Therefore dl1star_ii = dl1_ii/2 = 1/2 = 0.5
95+
# D_Dl1 = a_ii + dl1star_ii = 2 + 0.5 = 2.5
96+
η = 3.0
97+
D_Dl1_exp = Float64.([2.0, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])
98+
P = builder(A, 2; η=η)
99+
@test P.D_Dl1 D_Dl1_exp
100+
@test P.SLbuffer SLbuffer_exp
77101

102+
# Test with η = 1.0 (less strict than default)
103+
# a_ii = 2 >= η*dl1_ii = 1*1 = 1, condition satisfied
104+
# Therefore dl1star_ii = 0
105+
# D_Dl1 = a_ii = 2
106+
η = 1.0
107+
D_Dl1_exp = Float64.([2, 2, 2, 2, 2, 2, 2, 2, 2])
108+
P = builder(A, 2; η=η)
109+
@test P.D_Dl1 D_Dl1_exp
110+
@test P.SLbuffer SLbuffer_exp
111+
112+
# Verify preconditioner still works correctly with different η
113+
y_exp = [0, 1 / 2, 1.0, 2.0, 2.0, 3.5, 3.0, 5.0, 4.0]
114+
η = 2.0
115+
P = builder(A, 2; η=η)
116+
y = P \ x
117+
@test y y_exp
118+
end
119+
78120

79121
@testset "Non-Symmetric CSC" begin
80122
A2 = copy(A)
@@ -114,7 +156,7 @@ end
114156

115157
@testset "Symmetric A" begin
116158
md = mdopen("HB/bcsstk15")
117-
A = md.A
159+
A = md.A # type here is Symmetric{Float64, SparseMatrixCSC{Float64, Int64}}
118160
b = ones(size(A, 1))
119161
test_l1gs_prec(A, b)
120162
end

0 commit comments

Comments
 (0)