@@ -45,6 +45,10 @@ The preconditioner matrix $M_{ℓ_1}$ is defined as:
4545M_{ℓ_1GS} = M_{HGS} + D^{ℓ_1} \\
4646```
4747Where $D^{ℓ_1}$ is a diagonal matrix with entries: $d_{ii}^{ℓ_1} = \s um_{j ∈ Ωⁱₒ} |a_{ij}|$, and $M_{HGS}$ is obtained when the diagonal partitions are chosen to be the Gauss–Seidel sweeps on $ A_{kk} $
48+ However, we use another convergant variant, which takes adavantage of the local estimation of θ ( $a_{ii} >= θ * d_{ii}$):
49+ ```math
50+ M_{ℓ_1GS*} = M_{HGS} + D^{ℓ_1*}, \q uad \t ext{where} \q uad d_{ii}^{ℓ_1*} = \b egin{cases} 0, & \t ext{if } a_{ii} \g eq \e ta d_{ii}^{ℓ_1}; \\ d_{ii}^{ℓ_1}/2, & \t ext{otherwise.} \e nd{cases}
51+ ```
4852
4953# Fields
5054- `partitioning`: Encapsulates partitioning data (e.g. nparts, partsize, backend).
@@ -68,6 +72,9 @@ N = 128*16
6872A = spdiagm(0 => 2 * ones(N), -1 => -ones(N-1), 1 => -ones(N-1))
6973partsize = 16
7074prec = builder(A, partsize)
75+ # NOTES:
76+ # 1. for symmetric A, that's not of type `Symmetric`, or `SparseMatrixCSR` (i.e. in CSR format), then it's recommended to set `isSymA=true` for better performance `prec = builder(A, partsize; isSymA=true)`.
77+ # 2. for any userdefined `η` value, use `prec = builder(A, partsize; η=1.2)`.
7178```
7279"""
7380struct L1GSPreconditioner{Partitioning,VectorType}
@@ -94,11 +101,11 @@ struct L1GSPrecBuilder{DeviceType<:AbstractDevice}
94101 end
95102end
96103
97- (builder:: L1GSPrecBuilder )(A:: AbstractMatrix , partsize:: Ti , isSymA:: Bool = false ) where {Ti<: Integer } =
98- build_l1prec (builder, A, partsize, isSymA)
104+ (builder:: L1GSPrecBuilder )(A:: AbstractMatrix , partsize:: Ti ; isSymA:: Bool = false , η = 1.5 ) where {Ti<: Integer } =
105+ build_l1prec (builder, A, partsize, isSymA, η )
99106
100- (builder:: L1GSPrecBuilder )(A:: Symmetric , partsize:: Ti ) where {Ti<: Integer } =
101- build_l1prec (builder, A, partsize, true )
107+ (builder:: L1GSPrecBuilder )(A:: Symmetric , partsize:: Ti ;η = 1.5 ) where {Ti<: Integer } =
108+ build_l1prec (builder, A, partsize, true , η )
102109
103110struct DiagonalPartsIterator{Ti}
104111 size_A:: Ti
@@ -117,18 +124,18 @@ struct DiagonalPartCache{Ti}
117124end
118125
119126# # Preconditioner builder ##
120- function build_l1prec (builder:: L1GSPrecBuilder , A:: MatrixType , partsize:: Ti , isSymA:: Bool ) where {Ti<: Integer ,MatrixType}
127+ function build_l1prec (builder:: L1GSPrecBuilder , A:: MatrixType , partsize:: Ti , isSymA:: Bool , η ) where {Ti<: Integer ,MatrixType}
121128 partsize == 0 && error (" partsize must be greater than 0" )
122- _build_l1prec (builder, A, partsize, isSymA)
129+ _build_l1prec (builder, A, partsize, isSymA, η )
123130end
124131
125- function _build_l1prec (builder:: L1GSPrecBuilder , _A:: MatrixType , partsize:: Ti , isSymA:: Bool ) where {Ti<: Integer ,MatrixType}
132+ function _build_l1prec (builder:: L1GSPrecBuilder , _A:: MatrixType , partsize:: Ti , isSymA:: Bool , η ) where {Ti<: Integer ,MatrixType}
126133 # `nchunks` is either CPU cores or GPU blocks.
127134 # Each chunk will be assigned `nparts`, each of size `partsize`.
128135 # In GPU backend, `nchunks` is the number of blocks and `partsize` is the number of threads per block.
129136 A = get_data (_A) # for symmetric case
130137 partitioning = _blockpartitioning (builder, A, partsize)
131- D_Dl1, SLbuffer = _precompute_blocks (A, partitioning, isSymA)
138+ D_Dl1, SLbuffer = _precompute_blocks (A, partitioning, isSymA, η )
132139 L1GSPreconditioner (partitioning, D_Dl1, SLbuffer)
133140end
134141
@@ -330,7 +337,7 @@ _pack_strict_lower!(::AbstractMatrixSymmetry, ::CSRFormat, SLbuffer, A, start_id
330337 _pack_strict_lower_csr! (SLbuffer, getrowptr (A), colvals (A), getnzval (A), start_idx, end_idx, partsize, k)
331338
332339
333- function _precompute_blocks (_A:: AbstractSparseMatrix , partitioning:: BlockPartitioning , isSymA:: Bool )
340+ function _precompute_blocks (_A:: AbstractSparseMatrix , partitioning:: BlockPartitioning , isSymA:: Bool , η )
334341 @timeit_debug " _precompute_blocks" begin
335342 # No assumptions on A, i.e. A here might be in either backend compatible format or not.
336343 # So we have to convert it to backend compatible format, if it is not already.
@@ -340,25 +347,25 @@ function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartiti
340347 (;partsize, nparts, nchunks, chunksize, backend) = partitioning
341348 A = adapt (backend, _A)
342349 N = size (A, 1 )
350+ Tf = eltype (A)
343351
344- D_Dl1 = adapt (backend, zeros (eltype (A), N)) # D + Dˡ
352+ η = convert (Tf, η)
353+ D_Dl1 = adapt (backend, zeros (Tf, N)) # D + Dˡ
345354 last_partsize = N - (nparts - 1 ) * partsize # size of the last partition
346355 SLbuffer_size = (partsize * (partsize - 1 ) * (nparts- 1 )) ÷ 2 + last_partsize * (last_partsize - 1 ) ÷ 2
347- SLbuffer = adapt (backend, zeros (eltype (A) , SLbuffer_size)) # strictly lower triangular part of all diagonal blocks stored in a 1D array
356+ SLbuffer = adapt (backend, zeros (Tf , SLbuffer_size)) # strictly lower triangular part of all diagonal blocks stored in a 1D array
348357 symA = isSymA ? SymmetricMatrix () : NonSymmetricMatrix ()
349358
350359 ndrange = nchunks * chunksize
351360 @timeit_debug " kernel setup" kernel = _precompute_blocks_kernel! (backend, chunksize, ndrange)
352- @timeit_debug " kernel execution" kernel (D_Dl1, SLbuffer, A, symA, partsize, nparts, nchunks, chunksize; ndrange= ndrange)
361+ @timeit_debug " kernel execution" kernel (D_Dl1, SLbuffer, A, symA, partsize, nparts, nchunks, chunksize, η ; ndrange= ndrange)
353362 @timeit_debug " synchronize" synchronize (backend)
354363
355364 return D_Dl1, SLbuffer
356365 end
357366end
358367
359- const η = 1.5 # eq. (6.3)
360-
361- @kernel function _precompute_blocks_kernel! (D_Dl1, SLbuffer, A, symA, partsize:: Ti , nparts:: Ti , nchunks:: Ti , chunksize:: Ti ) where {Ti<: Integer }
368+ @kernel function _precompute_blocks_kernel! (D_Dl1, SLbuffer, A, symA, partsize:: Ti , nparts:: Ti , nchunks:: Ti , chunksize:: Ti , η) where {Ti<: Integer }
362369 initial_partition_idx = @index (Global)
363370 size_A = convert (Ti, size (A, 1 ))
364371 format_A = sparsemat_format_type (A)
0 commit comments