add eta and docstring

Abdelrahman912 · Abdelrahman912 · commit 3fc68b2206e9 · 2025-12-03T19:05:22.000+01:00
diff --git a/src/solver/linear/preconditioners/l1_gauss_seidel.jl b/src/solver/linear/preconditioners/l1_gauss_seidel.jl
@@ -45,6 +45,10 @@ The preconditioner matrix $M_{ℓ_1}$  is defined as:
 M_{ℓ_1GS} = M_{HGS} + D^{ℓ_1} \\
 ```
 Where $D^{ℓ_1}$ is a diagonal matrix with entries: $d_{ii}^{ℓ_1} = \sum_{j ∈ Ωⁱₒ} |a_{ij}|$, and $M_{HGS}$ is obtained when the diagonal partitions are chosen to be the Gauss–Seidel sweeps on $ A_{kk} $
+However, we use another convergant variant, which takes adavantage of the local estimation of θ ( $a_{ii} >= θ * d_{ii}$):
+```math
+M_{ℓ_1GS*} = M_{HGS} + D^{ℓ_1*}, \quad \text{where} \quad d_{ii}^{ℓ_1*} = \begin{cases} 0, & \text{if } a_{ii} \geq \eta d_{ii}^{ℓ_1}; \\ d_{ii}^{ℓ_1}/2, & \text{otherwise.} \end{cases}
+```
 
 # Fields
 - `partitioning`: Encapsulates partitioning data (e.g. nparts, partsize, backend).
@@ -68,6 +72,9 @@ N = 128*16
 A = spdiagm(0 => 2 * ones(N), -1 => -ones(N-1), 1 => -ones(N-1))
 partsize = 16
 prec = builder(A, partsize)
+# NOTES:
+# 1. for symmetric A, that's not of type `Symmetric`, or `SparseMatrixCSR` (i.e. in CSR format), then it's recommended to set `isSymA=true` for better performance `prec = builder(A, partsize; isSymA=true)`.
+# 2. for any userdefined `η` value, use `prec = builder(A, partsize; η=1.2)`.
 ```
 """
 struct L1GSPreconditioner{Partitioning,VectorType}
@@ -94,11 +101,11 @@ struct L1GSPrecBuilder{DeviceType<:AbstractDevice}
     end
 end
 
-(builder::L1GSPrecBuilder)(A::AbstractMatrix, partsize::Ti, isSymA::Bool = false) where {Ti<:Integer} =
-    build_l1prec(builder, A, partsize, isSymA)
+(builder::L1GSPrecBuilder)(A::AbstractMatrix, partsize::Ti; isSymA::Bool = false, η = 1.5) where {Ti<:Integer} =
+    build_l1prec(builder, A, partsize, isSymA, η)
 
-(builder::L1GSPrecBuilder)(A::Symmetric, partsize::Ti) where {Ti<:Integer} =
-    build_l1prec(builder, A, partsize, true)
+(builder::L1GSPrecBuilder)(A::Symmetric, partsize::Ti;η = 1.5) where {Ti<:Integer} =
+    build_l1prec(builder, A, partsize, true, η)
 
 struct DiagonalPartsIterator{Ti}
     size_A::Ti
@@ -117,18 +124,18 @@ struct DiagonalPartCache{Ti}
 end
 
 ## Preconditioner builder ##
-function build_l1prec(builder::L1GSPrecBuilder, A::MatrixType, partsize::Ti, isSymA::Bool) where {Ti<:Integer,MatrixType}
+function build_l1prec(builder::L1GSPrecBuilder, A::MatrixType, partsize::Ti, isSymA::Bool, η) where {Ti<:Integer,MatrixType}
     partsize == 0 && error("partsize must be greater than 0")
-    _build_l1prec(builder, A, partsize, isSymA)
+    _build_l1prec(builder, A, partsize, isSymA, η)
 end
 
-function _build_l1prec(builder::L1GSPrecBuilder, _A::MatrixType, partsize::Ti, isSymA::Bool) where {Ti<:Integer,MatrixType}
+function _build_l1prec(builder::L1GSPrecBuilder, _A::MatrixType, partsize::Ti, isSymA::Bool, η) where {Ti<:Integer,MatrixType}
     # `nchunks` is either CPU cores or GPU blocks.
     # Each chunk will be assigned `nparts`, each of size `partsize`.
     # In GPU backend, `nchunks` is the number of blocks and `partsize` is the number of threads per block.
     A = get_data(_A) # for symmetric case
     partitioning = _blockpartitioning(builder, A, partsize)
-    D_Dl1, SLbuffer = _precompute_blocks(A, partitioning, isSymA)
+    D_Dl1, SLbuffer = _precompute_blocks(A, partitioning, isSymA, η)
     L1GSPreconditioner(partitioning, D_Dl1, SLbuffer)
 end
 
@@ -330,7 +337,7 @@ _pack_strict_lower!(::AbstractMatrixSymmetry, ::CSRFormat, SLbuffer, A, start_id
     _pack_strict_lower_csr!(SLbuffer, getrowptr(A), colvals(A), getnzval(A), start_idx, end_idx, partsize, k)
 
 
-function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartitioning, isSymA::Bool)
+function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartitioning, isSymA::Bool, η)
     @timeit_debug "_precompute_blocks" begin
         # No assumptions on A, i.e. A here might be in either backend compatible format or not.
         # So we have to convert it to backend compatible format, if it is not already.
@@ -340,25 +347,25 @@ function _precompute_blocks(_A::AbstractSparseMatrix, partitioning::BlockPartiti
         (;partsize, nparts, nchunks, chunksize, backend) = partitioning
         A = adapt(backend, _A)
         N = size(A, 1)
+        Tf = eltype(A)
 
-        D_Dl1 = adapt(backend, zeros(eltype(A), N)) # D + Dˡ
+        η = convert(Tf, η)
+        D_Dl1 = adapt(backend, zeros(Tf, N)) # D + Dˡ
         last_partsize = N - (nparts - 1) * partsize # size of the last partition
         SLbuffer_size = (partsize * (partsize - 1) * (nparts-1)) ÷ 2 +  last_partsize * (last_partsize - 1) ÷ 2
-        SLbuffer = adapt(backend, zeros(eltype(A), SLbuffer_size)) # strictly lower triangular part of all diagonal blocks stored in a 1D array
+        SLbuffer = adapt(backend, zeros(Tf, SLbuffer_size)) # strictly lower triangular part of all diagonal blocks stored in a 1D array
         symA = isSymA ? SymmetricMatrix() : NonSymmetricMatrix()
 
         ndrange = nchunks * chunksize
         @timeit_debug "kernel setup" kernel = _precompute_blocks_kernel!(backend, chunksize, ndrange)
-        @timeit_debug "kernel execution" kernel(D_Dl1, SLbuffer, A, symA, partsize, nparts, nchunks, chunksize; ndrange=ndrange)
+        @timeit_debug "kernel execution" kernel(D_Dl1, SLbuffer, A, symA, partsize, nparts, nchunks, chunksize, η; ndrange=ndrange)
         @timeit_debug "synchronize" synchronize(backend)
 
         return D_Dl1, SLbuffer
     end
 end
 
-const η = 1.5 # eq. (6.3)
-
-@kernel function _precompute_blocks_kernel!(D_Dl1, SLbuffer, A, symA, partsize::Ti, nparts::Ti, nchunks::Ti, chunksize::Ti) where {Ti<:Integer}
+@kernel function _precompute_blocks_kernel!(D_Dl1, SLbuffer, A, symA, partsize::Ti, nparts::Ti, nchunks::Ti, chunksize::Ti, η) where {Ti<:Integer}
     initial_partition_idx = @index(Global)
     size_A = convert(Ti, size(A, 1))
     format_A = sparsemat_format_type(A)
diff --git a/test/test_preconditioners.jl b/test/test_preconditioners.jl
@@ -3,13 +3,14 @@ using KernelAbstractions
 using JLD2: load
 import Thunderbolt: ThreadedSparseMatrixCSR
 using TimerOutputs
+using LinearAlgebra: Symmetric
 
 ##########################################
 ## L1 Gauss Seidel Preconditioner - CPU ##
 ##########################################
 
 # Enable debug timings for Thunderbolt
-TimerOutputs.enable_debug_timings(Thunderbolt.Preconditioner)
+TimerOutputs.enable_debug_timings(Thunderbolt.Preconditioners)
 
 function poisson_test_matrix(N)
     # Poisson's equation in 1D with Dirichlet BCs
@@ -22,7 +23,7 @@ function test_sym(testname, A, x, y_exp, D_Dl1_exp, SLbuffer_exp, partsize)
         total_ncores = 8 # Assuming 8 cores for testing
         for ncores in 1:total_ncores # testing for multiple cores to check that the answer is independent of the number of cores
             builder = L1GSPrecBuilder(PolyesterDevice(ncores))
-            P = builder(A, partsize)
+            P = A isa Symmetric ? builder(A, partsize) : builder(A, partsize; isSymA=true)
             @test P.D_Dl1 ≈ D_Dl1_exp
             @test P.SLbuffer ≈ SLbuffer_exp
             y = P \ x
@@ -74,7 +75,48 @@ end
         test_sym("CPU, CSR", B, x, y_exp, D_Dl1_exp, SLbuffer_exp, 2)
         C = ThreadedSparseMatrixCSR(B)
         test_sym("CPU, Threaded CSR", C, x, y_exp, D_Dl1_exp, SLbuffer_exp, 2)
+        
+        @testset "η parameter" begin
+            # Test with η = 2.0 (more strict than default 1.5)
+            # For Poisson matrix: a_ii = 2, dl1_ii = 1 for all rows
+            # Since a_ii = 2 >= η*dl1_ii = 2*1 = 2, all rows satisfy condition
+            # Therefore dl1star_ii = 0 for all rows, D_Dl1 = a_ii = 2
+            η = 2.0
+            D_Dl1_exp = Float64.([2, 2, 2, 2, 2, 2, 2, 2, 2])
+            SLbuffer_exp = Float64.([-1, -1, -1, -1])
+            builder = L1GSPrecBuilder(PolyesterDevice(2))
+            P = builder(A, 2; η=η)
+            @test P.D_Dl1 ≈ D_Dl1_exp
+            @test P.SLbuffer ≈ SLbuffer_exp
+
+            # Test with η = 3.0 (very strict)
+            # a_ii = 2 < η*dl1_ii = 3*1 = 3, condition NOT satisfied
+            # Therefore dl1star_ii = dl1_ii/2 = 1/2 = 0.5
+            # D_Dl1 = a_ii + dl1star_ii = 2 + 0.5 = 2.5
+            η = 3.0
+            D_Dl1_exp = Float64.([2.0, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])
+            P = builder(A, 2; η=η)
+            @test P.D_Dl1 ≈ D_Dl1_exp
+            @test P.SLbuffer ≈ SLbuffer_exp
 
+            # Test with η = 1.0 (less strict than default)
+            # a_ii = 2 >= η*dl1_ii = 1*1 = 1, condition satisfied
+            # Therefore dl1star_ii = 0
+            # D_Dl1 = a_ii = 2
+            η = 1.0
+            D_Dl1_exp = Float64.([2, 2, 2, 2, 2, 2, 2, 2, 2])
+            P = builder(A, 2; η=η)
+            @test P.D_Dl1 ≈ D_Dl1_exp
+            @test P.SLbuffer ≈ SLbuffer_exp
+
+            # Verify preconditioner still works correctly with different η
+            y_exp = [0, 1 / 2, 1.0, 2.0, 2.0, 3.5, 3.0, 5.0, 4.0]
+            η = 2.0
+            P = builder(A, 2; η=η)
+            y = P \ x
+            @test y ≈ y_exp
+        end
+            
 
         @testset "Non-Symmetric CSC" begin
             A2 = copy(A)
@@ -114,7 +156,7 @@ end
 
         @testset "Symmetric A" begin
             md = mdopen("HB/bcsstk15")
-            A = md.A
+            A = md.A # type here is Symmetric{Float64, SparseMatrixCSC{Float64, Int64}}
             b = ones(size(A, 1))
             test_l1gs_prec(A, b)
         end