From bff90fd5d46e6229e6133040c7047cc081e18176 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 13 Mar 2026 21:59:56 +0100 Subject: [PATCH 1/9] Add GG Anderson Mumps Solver --- .../LinearAlgebra/Solvers/coo_mumps_solver.h | 97 +++++++++ src/CMakeLists.txt | 8 + .../Solvers/coo_mumps_solver.cpp | 202 ++++++++++++++++++ tests/CMakeLists.txt | 1 + .../Solvers/coo_mumps_solver.cpp | 98 +++++++++ 5 files changed, 406 insertions(+) create mode 100644 include/LinearAlgebra/Solvers/coo_mumps_solver.h create mode 100644 src/LinearAlgebra/Solvers/coo_mumps_solver.cpp create mode 100644 tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp diff --git a/include/LinearAlgebra/Solvers/coo_mumps_solver.h b/include/LinearAlgebra/Solvers/coo_mumps_solver.h new file mode 100644 index 00000000..1a0d72f8 --- /dev/null +++ b/include/LinearAlgebra/Solvers/coo_mumps_solver.h @@ -0,0 +1,97 @@ +#pragma once + +#ifdef GMGPOLAR_USE_MUMPS + + #include "dmumps_c.h" + #include "../../LinearAlgebra/Matrix/coo_matrix.h" + #include "../../LinearAlgebra/Vector/vector.h" + +/* + * CooMumpsSolver - sparse direct solver backed by MUMPS (MUltifrontal Massively Parallel Solver). + * + * Accepts a matrix in COO (Coordinate) format and solves systems of the form Ax = b. + * The solver takes ownership of the matrix, as MUMPS may reorder and modify it in place + * during the analysis and factorization phases. + * + * Usage: + * 1. Construct with a fully assembled COO matrix (all non-zeros must be present, + * including both triangles if the matrix is symmetric). + * 2. Call solve(rhs) one or more times. The right-hand side vector is overwritten + * in place with the solution x on return. + * + * Symmetry: + * If the matrix is marked symmetric (is_symmetric = true), it is assumed to be + * symmetric positive definite and MUMPS is configured accordingly (SYM = 1). + * In that case, only the upper triangle is passed to MUMPS internally. + * This assumption is valid within GMGPolar because all system matrices arise from + * an invertible domain mapping, guaranteeing positive definiteness. + * + * Thread safety: + * Not thread-safe. Each instance manages its own MUMPS communicator and + * must not be shared across threads. + */ +class CooMumpsSolver +{ +public: + explicit CooMumpsSolver(SparseMatrixCOO matrix); + ~CooMumpsSolver(); + + // rhs is overwritten in-place with the solution on return. + void solve(Vector& rhs); + +private: + void initialize(); + void finalize(); + void configureICNTL(); + void configureCNTL(); + + /* ------------------------------------------------ */ + /* MUMPS uses 1-based indexing in the documentation */ + /* ------------------------------------------------ */ + int& ICNTL(int i) + { + return mumps_solver_.icntl[i - 1]; + } + double& CNTL(int i) + { + return mumps_solver_.cntl[i - 1]; + } + int& INFOG(int i) + { + return mumps_solver_.infog[i - 1]; + } + + SparseMatrixCOO extractUpperTriangle(const SparseMatrixCOO& matrix) const; + + /* ----------------------------------- */ + /* MUMPS jobs and constant definitions */ + /* ----------------------------------- */ + static constexpr int USE_COMM_WORLD = -987654; + static constexpr int PAR_NOT_PARALLEL = 0; + static constexpr int PAR_PARALLEL = 1; + + static constexpr int JOB_INIT = -1; + static constexpr int JOB_END = -2; + static constexpr int JOB_REMOVE_SAVED_DATA = -3; + static constexpr int JOB_FREE_INTERNAL_DATA = -4; + static constexpr int JOB_SUPPRESS_OOC_FILES = -200; + + static constexpr int JOB_ANALYSIS_PHASE = 1; + static constexpr int JOB_FACTORIZATION_PHASE = 2; + static constexpr int JOB_COMPUTE_SOLUTION = 3; + static constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4; + static constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5; + static constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6; + static constexpr int JOB_SAVE_INTERNAL_DATA = 7; + static constexpr int JOB_RESTORE_INTERNAL_DATA = 8; + static constexpr int JOB_DISTRIBUTE_RHS = 9; + + static constexpr int SYM_UNSYMMETRIC = 0; + static constexpr int SYM_POSITIVE_DEFINITE = 1; + static constexpr int SYM_GENERAL_SYMMETRIC = 2; + + SparseMatrixCOO matrix_; + DMUMPS_STRUC_C mumps_solver_ = {}; +}; + +#endif // GMGPOLAR_USE_MUMPS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 96abf3f6..b0d90afd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,13 @@ set(POLAR_GRID_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/PolarGrid/anisotropic_division.cpp ) +# Gather all source files +# file(GLOB_RECURSE LINEAR_ALGEBRA_SOURCES +# ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebra/Solvers/*.cpp) +set(LINEAR_ALGEBRA_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebra/Solvers/coo_mumps_solver.cpp +) + # file(GLOB_RECURSE GMG_POLAR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/GMGPolar/*.cpp) set(GMG_POLAR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/GMGPolar/gmgpolar.cpp @@ -47,6 +54,7 @@ set(CONFIG_PARSER_SOURCES # Create the main library add_library(GMGPolarLib STATIC ${POLAR_GRID_SOURCES} + ${LINEAR_ALGEBRA_SOURCES} ${GMG_POLAR_SOURCES} ${STENCIL_SOURCES} ${INTERPOLATION_SOURCES} diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp new file mode 100644 index 00000000..daa213ad --- /dev/null +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -0,0 +1,202 @@ +#include "../../../include/LinearAlgebra/Solvers/coo_mumps_solver.h" + +#ifdef GMGPOLAR_USE_MUMPS + + #include + #include + #include + +CooMumpsSolver::CooMumpsSolver(SparseMatrixCOO matrix) +{ + if (matrix.is_symmetric()) { + matrix_ = extractUpperTriangle(matrix); + } + else { + matrix_ = std::move(matrix); + } + + initialize(); +} + +CooMumpsSolver::~CooMumpsSolver() +{ + finalize(); +} + +void CooMumpsSolver::solve(Vector& rhs) +{ + assert(std::ssize(rhs) == mumps_solver_.n); + + mumps_solver_.job = JOB_COMPUTE_SOLUTION; + mumps_solver_.nrhs = 1; + mumps_solver_.lrhs = mumps_solver_.n; + mumps_solver_.rhs = rhs.data(); + + dmumps_c(&mumps_solver_); + + if (INFOG(1) != 0) { + std::cerr << "MUMPS reported an error during solution phase " << "(INFOG(1) = " << INFOG(1) << ").\n"; + } +} + +void CooMumpsSolver::initialize() +{ + assert(matrix_.rows() == matrix_.columns()); + + /* + * MUMPS uses 1-based indexing; convert from 0-based. + */ + for (int i = 0; i < matrix_.non_zero_size(); i++) { + matrix_.row_index(i) += 1; + matrix_.col_index(i) += 1; + } + + mumps_solver_.job = JOB_INIT; + mumps_solver_.par = PAR_PARALLEL; + mumps_solver_.sym = matrix_.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC; + mumps_solver_.comm_fortran = USE_COMM_WORLD; + + dmumps_c(&mumps_solver_); + + configureICNTL(); + configureCNTL(); + + mumps_solver_.job = JOB_ANALYSIS_AND_FACTORIZATION; + mumps_solver_.n = matrix_.rows(); + mumps_solver_.nz = matrix_.non_zero_size(); + mumps_solver_.irn = matrix_.row_indices_data(); + mumps_solver_.jcn = matrix_.column_indices_data(); + mumps_solver_.a = matrix_.values_data(); + + dmumps_c(&mumps_solver_); + + if (INFOG(1) != 0) { + std::cerr << "MUMPS reported an error during analysis/factorization " << "(INFOG(1) = " << INFOG(1) << ").\n"; + return; + } + + if (mumps_solver_.sym == SYM_POSITIVE_DEFINITE && INFOG(12) != 0) { + std::cerr << "Matrix declared positive definite, " + << "but negative pivots were encountered during factorization " << "(INFOG(12) = " << INFOG(12) + << ").\n"; + } + + if (INFOG(1) != 0) { + std::cerr << "MUMPS reported an error during analysis/factorization " + << "(INFOG(1) = " << INFOG(1) << ").\n"; + return; + } +} + +void CooMumpsSolver::finalize() +{ + mumps_solver_.job = JOB_END; + dmumps_c(&mumps_solver_); +} + +void CooMumpsSolver::configureICNTL() +{ + // All ICNTL values are left at their defaults unless noted below. + // ICNTL(1) = 0: suppress error message output + // ICNTL(3) = 0: suppress global information output + // ICNTL(6) = 7: automatically choose permutation/scaling strategy + // ICNTL(7) = 5: use METIS for fill-reducing ordering + // ICNTL(48) = 0: disable tree parallelism (conflicts with OpenMP in newer MUMPS releases) + + ICNTL(1) = 0; // Output stream for error messages + ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process + ICNTL(3) = 0; // Output stream for global information, collected on the host + ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages + ICNTL(5) = 0; // Controls the matrix input format + ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scales the matrix + ICNTL(7) = 5; // Symmetric permutation (ordering) to determine pivot order for sequential analysis + ICNTL(8) = 77; // Scaling strategy + ICNTL(9) = 1; // Computes the solution using A or A^T + ICNTL(10) = 0; // Iterative refinement steps applied to the computed solution + ICNTL(11) = 0; // Error analysis statistics + ICNTL(12) = 0; // Ordering strategy for symmetric matrices + ICNTL(13) = 0; // Controls the parallelism of the root node + ICNTL(14) = matrix_.is_symmetric() ? 5 : 20; // Percentage increase in estimated working space + ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + // ICNTL(17) does not exist + ICNTL(18) = 0; // Strategy for the distributed input matrix + ICNTL(19) = 0; // Computes the Schur complement matrix + ICNTL(20) = 0; // Format of the right-hand sides (dense, sparse, or distributed) + ICNTL(21) = 0; // Distribution of the solution vectors (centralized or distributed) + ICNTL(22) = 0; // In-core/out-of-core (OOC) factorization and solve + ICNTL(23) = 0; // Maximum working memory in MegaBytes per working process + ICNTL(24) = 0; // Detection of null pivot rows + ICNTL(25) = 0; // Solution of a deficient matrix and null space basis computation + ICNTL(26) = 0; // Solution phase when Schur complement has been computed + ICNTL(27) = -32; // Blocking size for multiple right-hand sides + ICNTL(28) = 0; // Sequential or parallel computation of the ordering + ICNTL(29) = 0; // Parallel ordering tool when ICNTL(28)=1 + ICNTL(30) = 0; // User-specified entries in the inverse A^-1 + ICNTL(31) = 0; // Which factors may be discarded during factorization + ICNTL(32) = 0; // Forward elimination of the right-hand sides during factorization + ICNTL(33) = 0; // Computes the determinant of the input matrix + ICNTL(34) = 0; // Conservation of OOC files during JOB=-3 + ICNTL(35) = 0; // Activation of the BLR feature + ICNTL(36) = 0; // Choice of BLR factorization variant + ICNTL(37) = 0; // BLR compression of the contribution blocks + ICNTL(38) = 600; // Estimated compression rate of LU factors + ICNTL(39) = 500; // Estimated compression rate of contribution blocks + // ICNTL(40-47) do not exist + ICNTL(48) = 0; // Multithreading with tree parallelism + ICNTL(49) = 0; // Compact workarray id%S at end of factorization phase + // ICNTL(50-55) do not exist + ICNTL(56) = 0; // Detects pseudo-singularities; rank-revealing factorization of root node + // ICNTL(57) does not exist + ICNTL(58) = 2; // Options for symbolic factorization + // ICNTL(59-60) do not exist +} + +void CooMumpsSolver::configureCNTL() +{ + // All CNTL values are left at their defaults unless noted below. + + CNTL(1) = -1.0; // Relative threshold for numerical pivoting + CNTL(2) = -1.0; // Stopping criterion for iterative refinement + CNTL(3) = 0.0; // Threshold for null pivot row detection + CNTL(4) = -1.0; // Threshold for static pivoting + CNTL(5) = 0.0; // Fixation for null pivots (effective only when null pivot detection is active) + // CNTL(6) does not exist + CNTL(7) = 0.0; // Dropping parameter precision for BLR compression + // CNTL(8-15) do not exist +} + +SparseMatrixCOO CooMumpsSolver::extractUpperTriangle(const SparseMatrixCOO& matrix) const +{ + const int full_nnz = matrix.non_zero_size(); + const int rows = matrix.rows(); + const int cols = matrix.columns(); + + int symmetric_nnz = 0; + + for (int i = 0; i < full_nnz; ++i) { + if (matrix.row_index(i) <= matrix.col_index(i)) + symmetric_nnz++; + } + + SparseMatrixCOO sym(rows, cols, symmetric_nnz); + sym.is_symmetric(true); + + int current = 0; + + for (int i = 0; i < full_nnz; ++i) { + const int row = matrix.row_index(i); + const int column = matrix.col_index(i); + + if (row <= column) { + sym.row_index(current) = row; + sym.col_index(current) = column; + sym.value(current) = matrix.value(i); + current++; + } + } + + return sym; +} + +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e02b7577..038278b1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -12,6 +12,7 @@ add_executable(gmgpolar_tests LinearAlgebra/Matrix/coo_matrix.cpp LinearAlgebra/Matrix/csr_matrix.cpp LinearAlgebra/Solvers/csr_lu_solver.cpp + LinearAlgebra/Solvers/coo_mumps_solver.cpp LinearAlgebra/Solvers/tridiagonal_solver.cpp PolarGrid/polargrid.cpp Interpolation/prolongation.cpp diff --git a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp new file mode 100644 index 00000000..eb3344c9 --- /dev/null +++ b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -0,0 +1,98 @@ +#ifdef GMGPOLAR_USE_MUMPS + + #include + + #include "../../../include/LinearAlgebra/Vector/vector.h" + #include "../../../include/LinearAlgebra/Matrix/coo_matrix.h" + #include "../../../include/LinearAlgebra/Solvers/coo_mumps_solver.h" + +// ----------------------------------------------------------------------- +// Test 1: General (non-symmetric) 4x4 system +// +// Matrix A (non-symmetric): +// [ 1 0 2 0 ] +// [ 3 0 4 5 ] +// [ 0 6 7 0 ] +// [ 0 8 0 9 ] +// +// RHS b = [2, 4, 6, 8]^T +// ----------------------------------------------------------------------- +TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) +{ + using triplet = SparseMatrixCOO::triplet_type; + + // All non-zero entries (0-based row/col indices) + std::vector entries = {{0, 0, 1.0}, {0, 2, 2.0}, {1, 0, 3.0}, {1, 2, 4.0}, {1, 3, 5.0}, + {2, 1, 6.0}, {2, 2, 7.0}, {3, 1, 8.0}, {3, 3, 9.0}}; + + SparseMatrixCOO mat(4, 4, entries); + mat.is_symmetric(false); + + CooMumpsSolver solver(std::move(mat)); + + Vector rhs("rhs", 4); + rhs(0) = 2.0; + rhs(1) = 4.0; + rhs(2) = 6.0; + rhs(3) = 8.0; + + solver.solve(rhs); + + Vector solution("solution", 4); + solution(0) = 140.0 / 43.0; + solution(1) = 149.0 / 86.0; + solution(2) = -27.0 / 43.0; + solution(3) = -28.0 / 43.0; + + const double tol = 1e-10; + EXPECT_NEAR(rhs(0), solution(0), tol); + EXPECT_NEAR(rhs(1), solution(1), tol); + EXPECT_NEAR(rhs(2), solution(2), tol); + EXPECT_NEAR(rhs(3), solution(3), tol); +} + +// ----------------------------------------------------------------------- +// Test 2: Symmetric positive-definite 4x4 system +// +// Matrix A (SPD): +// [ 4 0 2 0 ] +// [ 0 5 1 3 ] +// [ 2 1 6 2 ] +// [ 0 3 2 7 ] +// +// RHS b = [2, 4, 6, 8]^T +// ----------------------------------------------------------------------- +TEST(CooMumpsSolverTest, SymmetricPositiveDefinite4x4) +{ + using triplet = SparseMatrixCOO::triplet_type; + + std::vector entries = {{0, 0, 4.0}, {1, 1, 5.0}, {2, 0, 2.0}, {2, 1, 1.0}, {2, 2, 6.0}, {3, 1, 3.0}, + {3, 2, 2.0}, {3, 3, 7.0}, {0, 2, 2.0}, {1, 2, 1.0}, {1, 3, 3.0}, {2, 3, 2.0}}; + + SparseMatrixCOO mat(4, 4, entries); + mat.is_symmetric(true); + + CooMumpsSolver solver(std::move(mat)); + + Vector rhs("rhs", 4); + rhs(0) = 2.0; + rhs(1) = 4.0; + rhs(2) = 6.0; + rhs(3) = 8.0; + + solver.solve(rhs); + + Vector solution("solution", 4); + solution(0) = 9.0 / 46.0; + solution(1) = 3.0 / 23.0; + solution(2) = 14.0 / 23.0; + solution(3) = 21.0 / 23.0; + + const double tol = 1e-10; + EXPECT_NEAR(rhs(0), solution(0), tol); + EXPECT_NEAR(rhs(1), solution(1), tol); + EXPECT_NEAR(rhs(2), solution(2), tol); + EXPECT_NEAR(rhs(3), solution(3), tol); +} + +#endif // GMGPOLAR_USE_MUMPS \ No newline at end of file From b22e53e406d223756bad6edff9d026a39b4fa25f Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 13 Mar 2026 22:02:54 +0100 Subject: [PATCH 2/9] GG Anderson Formatting --- src/LinearAlgebra/Solvers/coo_mumps_solver.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp index daa213ad..58b75fef 100644 --- a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -82,8 +82,7 @@ void CooMumpsSolver::initialize() } if (INFOG(1) != 0) { - std::cerr << "MUMPS reported an error during analysis/factorization " - << "(INFOG(1) = " << INFOG(1) << ").\n"; + std::cerr << "MUMPS reported an error during analysis/factorization " << "(INFOG(1) = " << INFOG(1) << ").\n"; return; } } From 1a7393ecc74f954005047c5b7c27be3e21e0ad18 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 13 Mar 2026 22:04:32 +0100 Subject: [PATCH 3/9] GG Anderson Copy Copy Cat --- src/LinearAlgebra/Solvers/coo_mumps_solver.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp index 58b75fef..84ec9af1 100644 --- a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -80,11 +80,6 @@ void CooMumpsSolver::initialize() << "but negative pivots were encountered during factorization " << "(INFOG(12) = " << INFOG(12) << ").\n"; } - - if (INFOG(1) != 0) { - std::cerr << "MUMPS reported an error during analysis/factorization " << "(INFOG(1) = " << INFOG(1) << ").\n"; - return; - } } void CooMumpsSolver::finalize() From 015a1f9c1eb306f859560740d548cdb2a42ca24d Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 14 Mar 2026 21:43:17 +0100 Subject: [PATCH 4/9] Unlimited power! --- .../applySymmetryShift.inl | 18 +-- .../buildSolverMatrix.inl | 66 ++------- .../directSolverGive.h | 20 +-- .../directSolverGive.inl | 11 +- .../initializeMumps.inl | 128 ----------------- .../applySymmetryShift.inl | 18 +-- .../buildSolverMatrix.inl | 133 ++++-------------- .../directSolverTake.h | 22 +-- .../directSolverTake.inl | 11 +- .../initializeMumps.inl | 128 ----------------- .../buildSolverMatrix.inl | 38 ++--- .../directSolverGive.inl | 5 - .../directSolverGiveCustomLU.h | 1 - .../buildSolverMatrix.inl | 88 +++--------- .../directSolverTake.inl | 5 - .../directSolverTakeCustomLU.h | 1 - include/DirectSolver/directSolver.h | 4 +- 17 files changed, 104 insertions(+), 593 deletions(-) delete mode 100644 include/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.inl delete mode 100644 include/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.inl diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.inl index 8ffe9110..d703621e 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.inl @@ -131,25 +131,15 @@ void DirectSolver_COO_MUMPS_Give::applySymmetryShift(Vector::grid_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - const int num_omp_threads = DirectSolver::num_omp_threads_; assert(std::ssize(x) == grid.numberOfNodes()); assert(grid.nr() >= 4); - #pragma omp parallel sections num_threads(num_omp_threads) - { - #pragma omp section - { - if (DirBC_Interior) { - applySymmetryShiftInnerBoundary(x); - } - } - - #pragma omp section - { - applySymmetryShiftOuterBoundary(x); - } + if (DirBC_Interior) { + applySymmetryShiftInnerBoundary(x); } + + applySymmetryShiftOuterBoundary(x); } #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.inl index 1c63b22d..4c7cb0c1 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.inl @@ -785,6 +785,7 @@ void DirectSolver_COO_MUMPS_Give::buildSolverMatrixCircleSection { const PolarGrid& grid = DirectSolver::grid_; const LevelCache& level_cache = DirectSolver::level_cache_; + const bool DirBC_Interior = DirectSolver::DirBC_Interior_; const double r = grid.radius(i_r); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { @@ -795,8 +796,7 @@ void DirectSolver_COO_MUMPS_Give::buildSolverMatrixCircleSection level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build solver matrix at the current node - nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); + nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); } } @@ -806,6 +806,7 @@ void DirectSolver_COO_MUMPS_Give::buildSolverMatrixRadialSection { const PolarGrid& grid = DirectSolver::grid_; const LevelCache& level_cache = DirectSolver::level_cache_; + const bool DirBC_Interior = DirectSolver::DirBC_Interior_; const double theta = grid.theta(i_theta); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { @@ -816,32 +817,21 @@ void DirectSolver_COO_MUMPS_Give::buildSolverMatrixRadialSection level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build solver matrix at the current node - nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); + nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); } } -// clang-format off - -/* ------------------------------------------------------------------------ */ -/* If the indexing is not smoother-based, please adjust the access patterns */ template SparseMatrixCOO DirectSolver_COO_MUMPS_Give::buildSolverMatrix() { - const PolarGrid& grid = DirectSolver::grid_; - const int num_omp_threads = DirectSolver::num_omp_threads_; + const PolarGrid& grid = DirectSolver::grid_; + const int num_omp_threads = DirectSolver::num_omp_threads_; const int n = grid.numberOfNodes(); const int nnz = getNonZeroCountSolverMatrix(); - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. SparseMatrixCOO solver_matrix(n, n, nnz); - solver_matrix.is_symmetric(false); - - #pragma omp parallel for num_threads(num_omp_threads) if (nnz > 10'000) - for (int i = 0; i < nnz; i++) { - solver_matrix.value(i) = 0.0; - } + solver_matrix.is_symmetric(true); if (num_omp_threads == 1) { /* Single-threaded execution */ @@ -858,25 +848,25 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Give::buildSolver const int additional_radial_tasks = grid.ntheta() % 3; const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; - #pragma omp parallel num_threads(num_omp_threads) + #pragma omp parallel num_threads(num_omp_threads) { - #pragma omp for + #pragma omp for for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { int i_r = grid.numberSmootherCircles() - circle_task - 1; buildSolverMatrixCircleSection(i_r, solver_matrix); } - #pragma omp for + #pragma omp for for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { int i_r = grid.numberSmootherCircles() - circle_task - 1; buildSolverMatrixCircleSection(i_r, solver_matrix); } - #pragma omp for nowait + #pragma omp for nowait for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { int i_r = grid.numberSmootherCircles() - circle_task - 1; buildSolverMatrixCircleSection(i_r, solver_matrix); } - #pragma omp for + #pragma omp for for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 0) { int i_theta = radial_task + additional_radial_tasks; @@ -892,7 +882,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Give::buildSolver } } } - #pragma omp for + #pragma omp for for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 1) { int i_theta = radial_task + additional_radial_tasks; @@ -911,7 +901,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Give::buildSolver } } } - #pragma omp for + #pragma omp for for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { int i_theta = radial_task + additional_radial_tasks; buildSolverMatrixRadialSection(i_theta, solver_matrix); @@ -919,33 +909,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Give::buildSolver } } - /* Mumps: In the case of symmetric matrices, only half of the matrix should be provided. */ - /* Speeds up factorization time. */ - const bool construct_symmetric = true; - - if (!construct_symmetric) { - return solver_matrix; - } - - /* Only store the upper tridiagonal entries of the symmetric solver_matrix */ - const int symmetric_nnz = nnz - (nnz - n) / 2; - SparseMatrixCOO symmetric_solver_matrix(n, n, symmetric_nnz); - symmetric_solver_matrix.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < nnz; nz_index++) { - const int row = solver_matrix.row_index(nz_index); - const int col = solver_matrix.col_index(nz_index); - if (row <= col) { - symmetric_solver_matrix.row_index(current_nz) = row; - symmetric_solver_matrix.col_index(current_nz) = col; - symmetric_solver_matrix.value(current_nz) = std::move(solver_matrix.value(nz_index)); - current_nz++; - } - } - - return symmetric_solver_matrix; + return solver_matrix; } -// clang-format on #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h index 0dbeae4e..43d6a0c4 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h @@ -13,14 +13,12 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - ~DirectSolver_COO_MUMPS_Give() override; // Note: The rhs (right-hand side) vector gets overwritten during the solution process. void solveInPlace(Vector solution) override; private: - // Solver matrix and MUMPS solver structure - SparseMatrixCOO solver_matrix_; - DMUMPS_STRUC_C mumps_solver_; + // The stencil definitions must be defined before the declaration of the mumps_solver, + // since the mumps solver will be build in the member initializer of the DirectSolver class. // clang-format off const Stencil stencil_interior_ = { @@ -50,15 +48,14 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver }; // clang-format on + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver mumps_solver_; + // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); void buildSolverMatrixRadialSection(const int i_theta, SparseMatrixCOO& solver_matrix); - // Initializes the MUMPS solver with the specified matrix. - // Converts to 1-based indexing. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Adjusts the right-hand side vector for symmetry corrections. // This modifies the system from // A * solution = rhs @@ -70,12 +67,6 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver void applySymmetryShiftInnerBoundary(Vector x) const; void applySymmetryShiftOuterBoundary(Vector x) const; - // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - void solveWithMumps(Vector solution); - - // Finalizes the MUMPS solver, releasing any allocated resources. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); - // Returns the total number of non-zero elements in the solver matrix. int getNonZeroCountSolverMatrix() const; @@ -93,7 +84,6 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver #include "applySymmetryShift.inl" #include "buildSolverMatrix.inl" #include "directSolverGive.inl" - #include "initializeMumps.inl" #include "matrixStencil.inl" #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl index 7d3c2a48..5d42128b 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl @@ -8,9 +8,8 @@ DirectSolver_COO_MUMPS_Give::DirectSolver_COO_MUMPS_Give( const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , mumps_solver_(buildSolverMatrix()) { - solver_matrix_ = buildSolverMatrix(); - initializeMumpsSolver(mumps_solver_, solver_matrix_); } template @@ -23,13 +22,7 @@ void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector so // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - solveWithMumps(solution); -} - -template -DirectSolver_COO_MUMPS_Give::~DirectSolver_COO_MUMPS_Give() -{ - finalizeMumpsSolver(mumps_solver_); + mumps_solver_.solve(solution); } #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.inl deleted file mode 100644 index ea6308a8..00000000 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.inl +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#ifdef GMGPOLAR_USE_MUMPS - -template -void DirectSolver_COO_MUMPS_Give::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout - << "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase." - << std::endl; - } -} - -template -void DirectSolver_COO_MUMPS_Give::solveWithMumps(Vector result_rhs) -{ - mumps_solver_.job = JOB_COMPUTE_SOLUTION; - mumps_solver_.nrhs = 1; - mumps_solver_.nz_rhs = result_rhs.size(); - mumps_solver_.rhs = result_rhs.data(); - mumps_solver_.lrhs = result_rhs.size(); - dmumps_c(&mumps_solver_); - if (mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the direct system: " << mumps_solver_.info[0] << std::endl; - } -} - -template -void DirectSolver_COO_MUMPS_Give::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.inl index 21f5172f..5d1e9aba 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.inl @@ -91,25 +91,15 @@ void DirectSolver_COO_MUMPS_Take::applySymmetryShift(Vector::grid_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - const int num_omp_threads = DirectSolver::num_omp_threads_; assert(std::ssize(x) == grid.numberOfNodes()); assert(grid.nr() >= 4); - #pragma omp parallel sections num_threads(num_omp_threads) - { - #pragma omp section - { - if (DirBC_Interior) { - applySymmetryShiftInnerBoundary(x); - } - } - - #pragma omp section - { - applySymmetryShiftOuterBoundary(x); - } + if (DirBC_Interior) { + applySymmetryShiftInnerBoundary(x); } + + applySymmetryShiftOuterBoundary(x); } #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl index 72cf4876..048302a5 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl @@ -459,120 +459,49 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake( } template -void DirectSolver_COO_MUMPS_Take::buildSolverMatrixCircleSection(const int i_r, - SparseMatrixCOO& solver_matrix) -{ - const PolarGrid& grid = DirectSolver::grid_; - const LevelCache& level_cache = DirectSolver::level_cache_; - - assert(level_cache.cacheDensityProfileCoefficients()); - assert(level_cache.cacheDomainGeometry()); - - ConstVector arr = level_cache.arr(); - ConstVector att = level_cache.att(); - ConstVector art = level_cache.art(); - ConstVector detDF = level_cache.detDF(); - ConstVector coeff_beta = level_cache.coeff_beta(); - - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - // Build solver matrix at the current node - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); - } -} - -template -void DirectSolver_COO_MUMPS_Take::buildSolverMatrixRadialSection(const int i_theta, - SparseMatrixCOO& solver_matrix) +SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolverMatrix() { const PolarGrid& grid = DirectSolver::grid_; const LevelCache& level_cache = DirectSolver::level_cache_; + const int num_omp_threads = DirectSolver::num_omp_threads_; + const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - assert(level_cache.cacheDensityProfileCoefficients()); - assert(level_cache.cacheDomainGeometry()); - - ConstVector arr = level_cache.arr(); - ConstVector att = level_cache.att(); - ConstVector art = level_cache.art(); - ConstVector detDF = level_cache.detDF(); - ConstVector coeff_beta = level_cache.coeff_beta(); - - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - // Build solver matrix at the current node - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); - } -} - -// clang-format off - -/* ------------------------------------------------------------------------ */ -/* If the indexing is not smoother-based, please adjust the access patterns */ -template -SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolverMatrix() -{ - const PolarGrid& grid = DirectSolver::grid_; - const int num_omp_threads = DirectSolver::num_omp_threads_; - - const int n = grid.numberOfNodes(); + const int n = grid_.numberOfNodes(); const int nnz = getNonZeroCountSolverMatrix(); - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. SparseMatrixCOO solver_matrix(n, n, nnz); - solver_matrix.is_symmetric(false); - - if (num_omp_threads == 1) { - /* Single-threaded execution */ - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildSolverMatrixCircleSection(i_r, solver_matrix); - } - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildSolverMatrixRadialSection(i_theta, solver_matrix); - } - } - else { - /* Multi-threaded execution */ - #pragma omp parallel num_threads(num_omp_threads) - { - /* Circle Section */ - #pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildSolverMatrixCircleSection(i_r, solver_matrix); - } - /* Radial Section */ - #pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildSolverMatrixRadialSection(i_theta, solver_matrix); + solver_matrix.is_symmetric(true); + + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + + #pragma omp parallel num_threads(num_omp_threads_) + { + /* Circle Section */ + #pragma omp for nowait + for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, + coeff_beta); } } - } - - /* Mumps: In the case of symmetric matrices, only half of the matrix should be provided. */ - const bool construct_symmetric = true; - - if (!construct_symmetric) { - return solver_matrix; - } - - /* Only store the upper tridiagonal entries of the symmetric solver_matrix */ - const int symmetric_nnz = nnz - (nnz - n) / 2; - SparseMatrixCOO symmetric_solver_matrix(n, n, symmetric_nnz); - symmetric_solver_matrix.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < nnz; nz_index++) { - const int row = solver_matrix.row_index(nz_index); - const int col = solver_matrix.col_index(nz_index); - if (row <= col) { - symmetric_solver_matrix.row_index(current_nz) = row; - symmetric_solver_matrix.col_index(current_nz) = col; - symmetric_solver_matrix.value(current_nz) = std::move(solver_matrix.value(nz_index)); - current_nz++; + /* Radial Section */ + #pragma omp for nowait + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior solver_matrix, arr, att, art, detDF, + coeff_beta); + } } } - return symmetric_solver_matrix; + return solver_matrix; } -// clang-format on #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h index 8f0d4e98..a0a76bc3 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h @@ -13,14 +13,12 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - ~DirectSolver_COO_MUMPS_Take() override; // Note: The rhs (right-hand side) vector gets overwritten during the solution process. void solveInPlace(Vector solution) override; private: - // Solver matrix and MUMPS solver structure - SparseMatrixCOO solver_matrix_; - DMUMPS_STRUC_C mumps_solver_; + // The stencil definitions must be defined before the declaration of the mumps_solver, + // since the mumps solver will be build in the member initializer of the DirectSolver class. // clang-format off const Stencil stencil_interior_ = { @@ -50,14 +48,11 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver }; // clang-format on + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver mumps_solver_; + // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); - void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); - void buildSolverMatrixRadialSection(const int i_theta, SparseMatrixCOO& solver_matrix); - - // Initializes the MUMPS solver with the specified matrix. - // Converts to 1-based indexing. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); // Adjusts the right-hand side vector for symmetry corrections. // This modifies the system from @@ -70,12 +65,6 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver void applySymmetryShiftInnerBoundary(Vector x) const; void applySymmetryShiftOuterBoundary(Vector x) const; - // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - void solveWithMumps(Vector solution); - - // Finalizes the MUMPS solver, releasing any allocated resources. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); - // Returns the total number of non-zero elements in the solver matrix. int getNonZeroCountSolverMatrix() const; @@ -94,7 +83,6 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver #include "applySymmetryShift.inl" #include "buildSolverMatrix.inl" #include "directSolverTake.inl" - #include "initializeMumps.inl" #include "matrixStencil.inl" #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl index d80fb054..1e6aab16 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl @@ -8,9 +8,8 @@ DirectSolver_COO_MUMPS_Take::DirectSolver_COO_MUMPS_Take( const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , mumps_solver_(buildSolverMatrix()) { - solver_matrix_ = buildSolverMatrix(); - initializeMumpsSolver(mumps_solver_, solver_matrix_); } template @@ -23,13 +22,7 @@ void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector so // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - solveWithMumps(solution); -} - -template -DirectSolver_COO_MUMPS_Take::~DirectSolver_COO_MUMPS_Take() -{ - finalizeMumpsSolver(mumps_solver_); + mumps_solver_.solve(solution); } #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.inl deleted file mode 100644 index 85934979..00000000 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.inl +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#ifdef GMGPOLAR_USE_MUMPS - -template -void DirectSolver_COO_MUMPS_Take::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout - << "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase." - << std::endl; - } -} - -template -void DirectSolver_COO_MUMPS_Take::solveWithMumps(Vector result_rhs) -{ - mumps_solver_.job = JOB_COMPUTE_SOLUTION; - mumps_solver_.nrhs = 1; - mumps_solver_.nz_rhs = result_rhs.size(); - mumps_solver_.rhs = result_rhs.data(); - mumps_solver_.lrhs = result_rhs.size(); - dmumps_c(&mumps_solver_); - if (mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the direct system: " << mumps_solver_.info[0] << std::endl; - } -} - -template -void DirectSolver_COO_MUMPS_Take::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl index e2eb1237..dc1511a7 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl @@ -748,6 +748,7 @@ void DirectSolver_CSR_LU_Give::buildSolverMatrixCircleSection(co { const PolarGrid& grid = DirectSolver::grid_; const LevelCache& level_cache = DirectSolver::level_cache_; + const bool DirBC_Interior = DirectSolver::DirBC_Interior_; const double r = grid.radius(i_r); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { @@ -758,8 +759,7 @@ void DirectSolver_CSR_LU_Give::buildSolverMatrixCircleSection(co level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build solver matrix at the current node - nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); + nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); } } @@ -769,6 +769,7 @@ void DirectSolver_CSR_LU_Give::buildSolverMatrixRadialSection(co { const PolarGrid& grid = DirectSolver::grid_; const LevelCache& level_cache = DirectSolver::level_cache_; + const bool DirBC_Interior = DirectSolver::DirBC_Interior_; const double theta = grid.theta(i_theta); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { @@ -779,20 +780,15 @@ void DirectSolver_CSR_LU_Give::buildSolverMatrixRadialSection(co level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build solver matrix at the current node - nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); + nodeBuildSolverMatrixGive(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); } } -// clang-format off - -/* ------------------------------------------------------------------------ */ -/* If the indexing is not smoother-based, please adjust the access patterns */ template SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMatrix() { - const PolarGrid& grid = DirectSolver::grid_; - const int num_omp_threads = DirectSolver::num_omp_threads_; + const PolarGrid& grid = DirectSolver::grid_; + const int num_omp_threads = DirectSolver::num_omp_threads_; const int n = grid.numberOfNodes(); @@ -802,13 +798,6 @@ SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMat SparseMatrixCSR solver_matrix(n, n, nnz_per_row); - const int nnz = solver_matrix.non_zero_size(); - - #pragma omp parallel for num_threads(num_omp_threads) if (nnz > 10'000) - for (int i = 0; i < nnz; i++) { - solver_matrix.values_data()[i] = 0.0; - } - if (num_omp_threads == 1) { /* Single-threaded execution */ for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { @@ -824,25 +813,25 @@ SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMat const int additional_radial_tasks = grid.ntheta() % 3; const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; - #pragma omp parallel num_threads(num_omp_threads) +#pragma omp parallel num_threads(num_omp_threads) { - #pragma omp for +#pragma omp for for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { int i_r = grid.numberSmootherCircles() - circle_task - 1; buildSolverMatrixCircleSection(i_r, solver_matrix); } - #pragma omp for +#pragma omp for for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { int i_r = grid.numberSmootherCircles() - circle_task - 1; buildSolverMatrixCircleSection(i_r, solver_matrix); } - #pragma omp for nowait +#pragma omp for nowait for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { int i_r = grid.numberSmootherCircles() - circle_task - 1; buildSolverMatrixCircleSection(i_r, solver_matrix); } - #pragma omp for +#pragma omp for for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 0) { int i_theta = radial_task + additional_radial_tasks; @@ -858,7 +847,7 @@ SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMat } } } - #pragma omp for +#pragma omp for for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 1) { int i_theta = radial_task + additional_radial_tasks; @@ -877,7 +866,7 @@ SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMat } } } - #pragma omp for +#pragma omp for for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { int i_theta = radial_task + additional_radial_tasks; buildSolverMatrixRadialSection(i_theta, solver_matrix); @@ -887,4 +876,3 @@ SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMat return solver_matrix; } -// clang-format on diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.inl b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.inl index ea32ebbd..d26cab1f 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.inl @@ -16,8 +16,3 @@ void DirectSolver_CSR_LU_Give::solveInPlace(Vector solut { lu_solver_.solveInPlace(solution); } - -template -DirectSolver_CSR_LU_Give::~DirectSolver_CSR_LU_Give() -{ -} diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h index b1503ce9..0c069860 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h +++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h @@ -11,7 +11,6 @@ class DirectSolver_CSR_LU_Give : public DirectSolver const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - ~DirectSolver_CSR_LU_Give() override; // Note: The rhs (right-hand side) vector gets overwritten with the solution. void solveInPlace(Vector solution) override; diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl index 6c914238..2082cc37 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl @@ -247,34 +247,20 @@ void DirectSolver_CSR_LU_Take::nodeBuildSolverMatrixTake( } template -void DirectSolver_CSR_LU_Take::buildSolverMatrixCircleSection(const int i_r, - SparseMatrixCSR& solver_matrix) +SparseMatrixCSR DirectSolver_CSR_LU_Take::buildSolverMatrix() { const PolarGrid& grid = DirectSolver::grid_; const LevelCache& level_cache = DirectSolver::level_cache_; + const int num_omp_threads = DirectSolver::num_omp_threads_; + const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - assert(level_cache.cacheDensityProfileCoefficients()); - assert(level_cache.cacheDomainGeometry()); - - ConstVector arr = level_cache.arr(); - ConstVector att = level_cache.att(); - ConstVector art = level_cache.art(); - ConstVector detDF = level_cache.detDF(); - ConstVector coeff_beta = level_cache.coeff_beta(); + const int n = grid.numberOfNodes(); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - // Build solver matrix at the current node - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); - } -} + std::function nnz_per_row = [&](int global_index) { + return getStencilSize(global_index); + }; -template -void DirectSolver_CSR_LU_Take::buildSolverMatrixRadialSection(const int i_theta, - SparseMatrixCSR& solver_matrix) -{ - const PolarGrid& grid = DirectSolver::grid_; - const LevelCache& level_cache = DirectSolver::level_cache_; + SparseMatrixCSR solver_matrix(n, n, nnz_per_row); assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -285,57 +271,25 @@ void DirectSolver_CSR_LU_Take::buildSolverMatrixRadialSection(co ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - // Build solver matrix at the current node - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirectSolver::DirBC_Interior_, solver_matrix, arr, - att, art, detDF, coeff_beta); - } -} - -// clang-format off - -/* ------------------------------------------------------------------------ */ -/* If the indexing is not smoother-based, please adjust the access patterns */ -template -SparseMatrixCSR DirectSolver_CSR_LU_Take::buildSolverMatrix() -{ - const PolarGrid& grid = DirectSolver::grid_; - const int num_omp_threads = DirectSolver::num_omp_threads_; - - const int n = grid.numberOfNodes(); - - std::function nnz_per_row = [&](int global_index) { - return getStencilSize(global_index); - }; - - SparseMatrixCSR solver_matrix(n, n, nnz_per_row); - - if (num_omp_threads == 1) { - /* Single-threaded execution */ +#pragma omp parallel num_threads(num_omp_threads) + { + /* Circle Section */ +#pragma omp for nowait for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildSolverMatrixCircleSection(i_r, solver_matrix); + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, + coeff_beta); + } } + /* Radial Section */ +#pragma omp for nowait for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildSolverMatrixRadialSection(i_theta, solver_matrix); - } - } - else { - /* Multi-threaded execution */ - #pragma omp parallel num_threads(num_omp_threads) - { - /* Circle Section */ - #pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildSolverMatrixCircleSection(i_r, solver_matrix); - } - /* Radial Section */ - #pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildSolverMatrixRadialSection(i_theta, solver_matrix); + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, + coeff_beta); } } } return solver_matrix; } -// clang-format on diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.inl b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.inl index 2e823182..ac56f829 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.inl @@ -16,8 +16,3 @@ void DirectSolver_CSR_LU_Take::solveInPlace(Vector solut { lu_solver_.solveInPlace(solution); } - -template -DirectSolver_CSR_LU_Take::~DirectSolver_CSR_LU_Take() -{ -} diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h index b9a955ac..ac770b92 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h @@ -11,7 +11,6 @@ class DirectSolver_CSR_LU_Take : public DirectSolver const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - ~DirectSolver_CSR_LU_Take() override; // Note: The rhs (right-hand side) vector gets overwritten with the solution. void solveInPlace(Vector solution) override; diff --git a/include/DirectSolver/directSolver.h b/include/DirectSolver/directSolver.h index d49c8fb2..9bcb1087 100644 --- a/include/DirectSolver/directSolver.h +++ b/include/DirectSolver/directSolver.h @@ -5,14 +5,13 @@ #include #include "../InputFunctions/domainGeometry.h" +#include "../InputFunctions/densityProfileCoefficients.h" template class LevelCache; - template class Level; -#include "../InputFunctions/densityProfileCoefficients.h" #include "../Level/level.h" #include "../PolarGrid/polargrid.h" #include "../Definitions/global_definitions.h" @@ -21,6 +20,7 @@ class Level; #include "../LinearAlgebra/Matrix/coo_matrix.h" #include "../LinearAlgebra/Matrix/csr_matrix.h" #include "../LinearAlgebra/Solvers/csr_lu_solver.h" +#include "../LinearAlgebra/Solvers/coo_mumps_solver.h" #include "../Stencil/stencil.h" #ifdef GMGPOLAR_USE_MUMPS From b5c37ad50020c6be9555c9a451b9560304380a35 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 14 Mar 2026 21:47:05 +0100 Subject: [PATCH 5/9] Yeet --- .../DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h index ac770b92..32cf4c5d 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h @@ -48,8 +48,6 @@ class DirectSolver_CSR_LU_Take : public DirectSolver // clang-format on SparseMatrixCSR buildSolverMatrix(); - void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCSR& solver_matrix); - void buildSolverMatrixRadialSection(const int i_theta, SparseMatrixCSR& solver_matrix); // Returns the total number of non-zero elements in the solver matrix. int getNonZeroCountSolverMatrix() const; From b797925a3075f87bb218c2d7306f8278b6f50b3c Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 14 Mar 2026 22:09:32 +0100 Subject: [PATCH 6/9] Resolve compile error --- .../buildSolverMatrix.inl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl index 048302a5..a8832d22 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl @@ -466,7 +466,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolver const int num_omp_threads = DirectSolver::num_omp_threads_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - const int n = grid_.numberOfNodes(); + const int n = grid.numberOfNodes(); const int nnz = getNonZeroCountSolverMatrix(); SparseMatrixCOO solver_matrix(n, n, nnz); @@ -475,27 +475,27 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolver assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); - ConstVector arr = level_cache_.arr(); - ConstVector att = level_cache_.att(); - ConstVector art = level_cache_.art(); - ConstVector detDF = level_cache_.detDF(); - ConstVector coeff_beta = level_cache_.coeff_beta(); + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); - #pragma omp parallel num_threads(num_omp_threads_) + #pragma omp parallel num_threads(num_omp_threads) { /* Circle Section */ #pragma omp for nowait for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); } } /* Radial Section */ #pragma omp for nowait - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior solver_matrix, arr, att, art, detDF, + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); } } From 931305a5625ae43a238822a141748febac136a7c Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Sat, 14 Mar 2026 22:21:02 +0100 Subject: [PATCH 7/9] Update buildSolverMatrix.inl --- .../DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl index a8832d22..6b52e876 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl @@ -472,8 +472,8 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolver SparseMatrixCOO solver_matrix(n, n, nnz); solver_matrix.is_symmetric(true); - assert(level_cache_.cacheDensityProfileCoefficients()); - assert(level_cache_.cacheDomainGeometry()); + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); ConstVector arr = level_cache.arr(); ConstVector att = level_cache.att(); @@ -485,7 +485,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolver { /* Circle Section */ #pragma omp for nowait - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { + for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, coeff_beta); From aa7c21d8d52c46823ad6beefa86be92255666735 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sun, 15 Mar 2026 21:54:09 +0100 Subject: [PATCH 8/9] Use COO Mumps solver --- .../Smoother/SmootherTake/applyAscOrtho.inl | 94 +++- .../SmootherTake/buildInnerBoundaryAsc.inl | 172 ++++++ include/Smoother/SmootherTake/buildMatrix.inl | 498 ------------------ .../SmootherTake/buildTridiagonalAsc.inl | 340 ++++++++++++ .../Smoother/SmootherTake/initializeMumps.inl | 114 ---- .../Smoother/SmootherTake/matrixStencil.inl | 9 +- include/Smoother/SmootherTake/smootherTake.h | 124 ++--- .../Smoother/SmootherTake/smootherTake.inl | 58 +- .../Smoother/SmootherTake/solveAscSystem.inl | 27 +- include/Smoother/smoother.h | 3 +- 10 files changed, 684 insertions(+), 755 deletions(-) create mode 100644 include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl delete mode 100644 include/Smoother/SmootherTake/buildMatrix.inl create mode 100644 include/Smoother/SmootherTake/buildTridiagonalAsc.inl delete mode 100644 include/Smoother/SmootherTake/initializeMumps.inl diff --git a/include/Smoother/SmootherTake/applyAscOrtho.inl b/include/Smoother/SmootherTake/applyAscOrtho.inl index 26ffc1bc..889a45ed 100644 --- a/include/Smoother/SmootherTake/applyAscOrtho.inl +++ b/include/Smoother/SmootherTake/applyAscOrtho.inl @@ -215,15 +215,47 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar } // namespace smoother_take template -void SmootherTake::applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, - Vector temp) +void SmootherTake::applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, + Vector temp) { using smoother_take::nodeApplyAscOrthoCircleTake; const PolarGrid& grid = Smoother::grid_; const LevelCache& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; - assert(i_r >= 0 && i_r < grid.numberSmootherCircles()); + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + + /* The outer most circle next to the radial section is defined to be black. */ + const int start_black_circles = (grid.numberSmootherCircles() % 2 == 0) ? 1 : 0; + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} + +template +void SmootherTake::applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, + Vector temp) +{ + using smoother_take::nodeApplyAscOrthoCircleTake; + + const PolarGrid& grid = Smoother::grid_; + const LevelCache& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -234,22 +266,57 @@ void SmootherTake::applyAscOrthoCircleSection(int i_r, ConstVect ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, Smoother::DirBC_Interior_, x, rhs, temp, arr, - att, art, detDF, coeff_beta); + /* The outer most circle next to the radial section is defined to be black. */ + const int start_white_circles = (grid.numberSmootherCircles() % 2 == 0) ? 0 : 1; + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } template -void SmootherTake::applyAscOrthoRadialSection(int i_theta, ConstVector x, - ConstVector rhs, Vector temp) +void SmootherTake::applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, + Vector temp) { using smoother_take::nodeApplyAscOrthoRadialTake; const PolarGrid& grid = Smoother::grid_; const LevelCache& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; - assert(i_theta >= 0 && i_theta < grid.ntheta()); + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} + +template +void SmootherTake::applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, + Vector temp) +{ + using smoother_take::nodeApplyAscOrthoRadialTake; + + const PolarGrid& grid = Smoother::grid_; + const LevelCache& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -260,8 +327,11 @@ void SmootherTake::applyAscOrthoRadialSection(int i_theta, Const ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, Smoother::DirBC_Interior_, x, rhs, temp, arr, - att, art, detDF, coeff_beta); +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl new file mode 100644 index 00000000..48e8a23d --- /dev/null +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -0,0 +1,172 @@ +#pragma once + +namespace smoother_take +{ + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) = value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) = value; +} +#endif + +} // namespace smoother_take + +template +void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( + int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) +{ + using smoother_take::update_CSR_COO_MatrixElement; + + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + /* ------------------------------------------ */ + /* Circle Section: Node in the inner boundary */ + /* ------------------------------------------ */ + const int i_r = 0; + + int ptr, offset; + int row, column; + double value; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + const int center_index = i_theta; + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + const double h1 = 2.0 * grid.radius(0); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); + + const int left = grid.index(i_r, i_theta_AcrossOrigin); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = center_value; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = left_value; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Bottom]; + column = bottom_index; + value = bottom_value; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Top]; + column = top_index; + value = top_value; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } +} + +template +typename SmootherTake::MatrixType SmootherTake::buildInteriorBoundarySolverMatrix() +{ + const PolarGrid& grid = Smoother::grid_; + const LevelCache& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; + + const int i_r = 0; + const int ntheta = grid.ntheta(); + +#ifdef GMGPOLAR_USE_MUMPS + const int nnz = getNonZeroCountCircleAsc(i_r); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(true); +#else + // The stencils size for the inner boundary matrix is either 1 (Dirichlet BC) or 4 (across-origin discretization). + std::function nnz_per_row = [&](int i_theta) { + return DirBC_Interior ? 1 : 4; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, + art, detDF, coeff_beta); + } + + return inner_boundary_solver_matrix; +} diff --git a/include/Smoother/SmootherTake/buildMatrix.inl b/include/Smoother/SmootherTake/buildMatrix.inl deleted file mode 100644 index 625ef2ba..00000000 --- a/include/Smoother/SmootherTake/buildMatrix.inl +++ /dev/null @@ -1,498 +0,0 @@ -#pragma once - -namespace smoother_take -{ - -/* Tridiagonal matrices */ -static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, - double value) -{ - if (row == column) - solver.main_diagonal(batch, row) = value; - else if (row == column - 1) - solver.sub_diagonal(batch, row) = value; - else if (row == 0 && column == solver.matrixDimension() - 1) - solver.cyclic_corner(batch) = value; -} - -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) = val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) = val; -} -#endif - -} // namespace smoother_take - -template -void SmootherTake::nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, - ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta) -{ - using smoother_take::updateCOOCSRMatrixElement; - using smoother_take::updateMatrixElement; - - assert(i_r >= 0 && i_r < grid.nr()); - assert(i_theta >= 0 && i_theta < grid.ntheta()); - - const int numberSmootherCircles = grid.numberSmootherCircles(); - const int lengthSmootherRadial = grid.lengthSmootherRadial(); - - assert(numberSmootherCircles >= 2); - assert(lengthSmootherRadial >= 3); - - int row, column; - double value; - /* ------------------------------------------ */ - /* Node in the interior of the Circle Section */ - /* ------------------------------------------ */ - if (i_r > 0 && i_r < numberSmootherCircles) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = circle_tridiagonal_solver; - int batch = i_r; - - const int center_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Bottom */ - row = center_index; - column = bottom_index; - value = -coeff3 * (att[center] + att[bottom]); - updateMatrixElement(solver, batch, row, column, value); - - /* Top */ - row = center_index; - column = top_index; - value = -coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------ */ - /* Node in the interior of the Radial Section */ - /* ------------------------------------------ */ - else if (i_r > numberSmootherCircles && i_r < grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int left_index = i_r - numberSmootherCircles - 1; - const int right_index = i_r - numberSmootherCircles + 1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Left */ - row = center_index; - column = left_index; - value = -coeff1 * (arr[center] + arr[left]); - updateMatrixElement(solver, batch, row, column, value); - - /* Right */ - row = center_index; - column = right_index; - value = -coeff2 * (arr[center] + arr[right]); - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------ */ - /* Circle Section: Node in the inner boundary */ - /* ------------------------------------------ */ - else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - int ptr, offset; - int row, col; - double val; - if (DirBC_Interior) { - auto& matrix = inner_boundary_circle_matrix; - const int center_index = i_theta; - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - const double h1 = 2.0 * grid.radius(0); - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); - - const double coeff1 = 0.5 * (k1 + k2) / h1; - const double coeff2 = 0.5 * (k1 + k2) / h2; - const double coeff3 = 0.5 * (h1 + h2) / k1; - const double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - - const int left = grid.index(i_r, i_theta_AcrossOrigin); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& matrix = inner_boundary_circle_matrix; - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - - const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - const double left_value = -coeff1 * (arr[center] + arr[left]); - const double bottom_value = -coeff3 * (att[center] + att[bottom]); - const double top_value = -coeff4 * (att[center] + att[top]); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = center_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = left_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Bottom]; - col = bottom_index; - val = bottom_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Top]; - col = top_index; - val = top_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - } - /* --------------------------------------------- */ - /* Radial Section: Node next to circular section */ - /* --------------------------------------------- */ - else if (i_r == numberSmootherCircles) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int right_index = i_r - numberSmootherCircles + 1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Right */ - row = center_index; - column = right_index; - value = -coeff2 * (arr[center] + arr[right]); - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------- */ - /* Radial Section: Node next to outer boundary */ - /* ------------------------------------------- */ - else if (i_r == grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int left_index = i_r - numberSmootherCircles - 1; - const int right_index = i_r - numberSmootherCircles + 1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Left */ - row = center_index; - column = left_index; - value = -coeff1 * (arr[center] + arr[left]); - updateMatrixElement(solver, batch, row, column, value); - - /* Right: NOT INCLUDED! */ - row = center_index; - column = right_index; - value = 0.0; - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------ */ - /* Radial Section: Node on the outer boundary */ - /* ------------------------------------------ */ - else if (i_r == grid.nr() - 1) { - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int left_index = i_r - numberSmootherCircles - 1; - - /* Fill matrix row of (i,j) */ - row = center_index; - column = center_index; - value = 1.0; - updateMatrixElement(solver, batch, row, column, value); - - /* Left: NOT INCLUDED */ - row = center_index; - column = left_index; - value = 0.0; - updateMatrixElement(solver, batch, row, column, value); - } -} - -template -void SmootherTake::buildAscCircleSection(int i_r) -{ - using smoother_take::updateCOOCSRMatrixElement; - using smoother_take::updateMatrixElement; - - const PolarGrid& grid = Smoother::grid_; - const LevelCache& level_cache = Smoother::level_cache_; - - assert(level_cache.cacheDensityProfileCoefficients()); - assert(level_cache.cacheDomainGeometry()); - - ConstVector arr = level_cache.arr(); - ConstVector att = level_cache.att(); - ConstVector art = level_cache.art(); - ConstVector detDF = level_cache.detDF(); - ConstVector coeff_beta = level_cache.coeff_beta(); - - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid, Smoother::DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } -} - -template -void SmootherTake::buildAscRadialSection(int i_theta) -{ - using smoother_take::updateCOOCSRMatrixElement; - using smoother_take::updateMatrixElement; - - const PolarGrid& grid = Smoother::grid_; - const LevelCache& level_cache = Smoother::level_cache_; - - assert(level_cache.cacheDensityProfileCoefficients()); - assert(level_cache.cacheDomainGeometry()); - - ConstVector arr = level_cache.arr(); - ConstVector att = level_cache.att(); - ConstVector art = level_cache.art(); - ConstVector detDF = level_cache.detDF(); - ConstVector coeff_beta = level_cache.coeff_beta(); - - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid, Smoother::DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } -} - -template -void SmootherTake::buildAscMatrices() -{ - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - - const PolarGrid& grid = Smoother::grid_; - const bool DirBC_Interior = Smoother::DirBC_Interior_; - const int num_omp_threads = Smoother::num_omp_threads_; - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - return DirBC_Interior ? 1 : 4; - }; - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ -#pragma omp parallel num_threads(num_omp_threads) - { -#pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildAscCircleSection(i_r); - } - -#pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildAscRadialSection(i_theta); - } - } - - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------ */ - /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ - /* ------------------------------------------------------------------ */ - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; - } - } -#endif -} -// clang-format on diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl new file mode 100644 index 00000000..e5c7384c --- /dev/null +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -0,0 +1,340 @@ +#include "../../../include/Smoother/SmootherTake/smootherTake.h" + +namespace smoother_take +{ + +static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, + double value) +{ + if (row == column) + solver.main_diagonal(batch, row) = value; + else if (row == column - 1) + solver.sub_diagonal(batch, row) = value; + else if (row == 0 && column == solver.matrixDimension() - 1) + solver.cyclic_corner(batch) = value; +} + +} // namespace smoother_take + +template +void SmootherTake::nodeBuildTridiagonalSolverMatrices( + int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) +{ + using smoother_take::updateMatrixElement; + + assert(i_r >= 0 && i_r < grid.nr()); + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + const int numberSmootherCircles = grid.numberSmootherCircles(); + const int lengthSmootherRadial = grid.lengthSmootherRadial(); + + assert(numberSmootherCircles >= 2); + assert(lengthSmootherRadial >= 3); + + int row, column; + double value; + /* ------------------------------------------ */ + /* Node in the interior of the Circle Section */ + /* ------------------------------------------ */ + if (i_r > 0 && i_r < numberSmootherCircles) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = circle_tridiagonal_solver; + const int batch = i_r; + + const int center_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Bottom */ + row = center_index; + column = bottom_index; + value = bottom_value; + updateMatrixElement(solver, batch, row, column, value); + + /* Top */ + row = center_index; + column = top_index; + value = top_value; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------ */ + /* Node in the interior of the Radial Section */ + /* ------------------------------------------ */ + else if (i_r > numberSmootherCircles && i_r < grid.nr() - 2) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int left_index = i_r - numberSmootherCircles - 1; + const int right_index = i_r - numberSmootherCircles + 1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Left */ + row = center_index; + column = left_index; + value = left_value; + updateMatrixElement(solver, batch, row, column, value); + + /* Right */ + row = center_index; + column = right_index; + value = right_value; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------ */ + /* Circle Section: Node in the inner boundary */ + /* ------------------------------------------ */ + else if (i_r == 0) { + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + auto& solver = circle_tridiagonal_solver; + const int batch = i_r; + + const int center_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Bottom */ + row = center_index; + column = bottom_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Top */ + row = center_index; + column = top_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + } + /* --------------------------------------------- */ + /* Radial Section: Node next to circular section */ + /* --------------------------------------------- */ + else if (i_r == numberSmootherCircles) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int right_index = i_r - numberSmootherCircles + 1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Right */ + row = center_index; + column = right_index; + value = right_value; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------- */ + /* Radial Section: Node next to outer boundary */ + /* ------------------------------------------- */ + else if (i_r == grid.nr() - 2) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int left_index = i_r - numberSmootherCircles - 1; + const int right_index = i_r - numberSmootherCircles + 1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Left */ + row = center_index; + column = left_index; + value = left_value; + updateMatrixElement(solver, batch, row, column, value); + + /* Right: NOT INCLUDED! */ + row = center_index; + column = right_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------ */ + /* Radial Section: Node on the outer boundary */ + /* ------------------------------------------ */ + else if (i_r == grid.nr() - 1) { + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int left_index = i_r - numberSmootherCircles - 1; + + /* Fill matrix row of (i,j) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Left: NOT INCLUDED */ + row = center_index; + column = left_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + } +} + +template +void SmootherTake::buildTridiagonalSolverMatrices() +{ + const PolarGrid& grid = Smoother::grid_; + const LevelCache& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; + + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + +#pragma omp parallel num_threads(num_omp_threads) + { +#pragma omp for nowait + for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } + } + +#pragma omp for nowait + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } + } + } +} \ No newline at end of file diff --git a/include/Smoother/SmootherTake/initializeMumps.inl b/include/Smoother/SmootherTake/initializeMumps.inl deleted file mode 100644 index c871dbf7..00000000 --- a/include/Smoother/SmootherTake/initializeMumps.inl +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once - -#ifdef GMGPOLAR_USE_MUMPS - -template -void SmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the " - "factorization phase." - << std::endl; - } -} - -template -void SmootherTake::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/include/Smoother/SmootherTake/matrixStencil.inl b/include/Smoother/SmootherTake/matrixStencil.inl index 0f57d970..96eb37a3 100644 --- a/include/Smoother/SmootherTake/matrixStencil.inl +++ b/include/Smoother/SmootherTake/matrixStencil.inl @@ -10,6 +10,7 @@ const Stencil& SmootherTake::getStencil(int i_r) const assert(i_r == 0); const bool DirBC_Interior = Smoother::DirBC_Interior_; + return DirBC_Interior ? stencil_DB_ : circle_stencil_across_origin_; } @@ -22,8 +23,9 @@ int SmootherTake::getNonZeroCountCircleAsc(int i_r) const // because it has an additional across-origin coupling. assert(i_r == 0); - const PolarGrid& grid = Smoother::grid_; - const bool DirBC_Interior = Smoother::DirBC_Interior_; + const PolarGrid& grid = Smoother::grid_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 4; return size_stencil_inner_boundary * grid.ntheta(); } @@ -38,7 +40,8 @@ int SmootherTake::getCircleAscIndex(int i_r, int i_theta) const // because it has an additional across-origin coupling. assert(i_r == 0); - const bool DirBC_Interior = Smoother::DirBC_Interior_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 4; return size_stencil_inner_boundary * i_theta; } diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 235c6758..6b90d0d2 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -55,49 +55,19 @@ class SmootherTake : public Smoother const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~SmootherTake() override; - // Performs one full coupled smoothing sweep: // BC -> WC -> BR -> WR // using temp as RHS workspace. void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -119,6 +89,45 @@ class SmootherTake : public Smoother }; // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. + +#ifdef GMGPOLAR_USE_MUMPS + // When using the MUMPS solver, the matrix is assembled in COO format. + using MatrixType = SparseMatrixCOO; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver inner_boundary_mumps_solver_; +#else + // When using the in-house solver, the matrix is stored in CSR format. + using MatrixType = SparseMatrixCSR; + // Sparse matrix for the non-tridiagonal inner boundary circle block. + MatrixType inner_boundary_circle_matrix_; + // LU solver for the inner boundary circle block. + SparseLUSolver inner_boundary_lu_solver_; +#endif + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -130,20 +139,23 @@ class SmootherTake : public Smoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + void buildTridiagonalSolverMatrices(); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + MatrixType buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -151,8 +163,10 @@ class SmootherTake : public Smoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ /* Line-wise solvers */ @@ -170,21 +184,11 @@ class SmootherTake : public Smoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; #include "smootherTake.inl" -#include "buildMatrix.inl" +#include "buildInnerBoundaryAsc.inl" +#include "buildTridiagonalAsc.inl" #include "applyAscOrtho.inl" #include "solveAscSystem.inl" #include "matrixStencil.inl" -#include "initializeMumps.inl" diff --git a/include/Smoother/SmootherTake/smootherTake.inl b/include/Smoother/SmootherTake/smootherTake.inl index ce0fb589..4f76482e 100644 --- a/include/Smoother/SmootherTake/smootherTake.inl +++ b/include/Smoother/SmootherTake/smootherTake.inl @@ -9,21 +9,16 @@ SmootherTake::SmootherTake(const PolarGrid& grid, const LevelCac num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) -{ - buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + , inner_boundary_mumps_solver_(buildInteriorBoundarySolverMatrix()) #else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_lu_solver_(inner_boundary_circle_matrix_) #endif -} - -template -SmootherTake::~SmootherTake() { -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif + buildTridiagonalSolverMatrices(); + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); } // The smoothing solves linear systems of the form: @@ -54,46 +49,15 @@ void SmootherTake::smoothing(Vector x, ConstVector::level_cache_.cacheDensityProfileCoefficients()); - assert(Smoother::level_cache_.cacheDomainGeometry()); - - const PolarGrid& grid = Smoother::grid_; - const int num_omp_threads = Smoother::num_omp_threads_; - - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - const int start_black_circles = (grid.numberSmootherCircles() % 2 == 0) ? 1 : 0; - const int start_white_circles = (grid.numberSmootherCircles() % 2 == 0) ? 0 : 1; - - /* Black Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoBlackCircleSection(x, rhs, temp); solveBlackCircleSection(x, temp); - /* White Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoWhiteCircleSection(x, rhs, temp); solveWhiteCircleSection(x, temp); - /* Black Radial Section */ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoBlackRadialSection(x, rhs, temp); solveBlackRadialSection(x, temp); - /* White Radial Section*/ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoWhiteRadialSection(x, rhs, temp); solveWhiteRadialSection(x, temp); -} +} \ No newline at end of file diff --git a/include/Smoother/SmootherTake/solveAscSystem.inl b/include/Smoother/SmootherTake/solveAscSystem.inl index 95c237c0..aa0ef6e8 100644 --- a/include/Smoother/SmootherTake/solveAscSystem.inl +++ b/include/Smoother/SmootherTake/solveAscSystem.inl @@ -17,18 +17,11 @@ void SmootherTake::solveBlackCircleSection(Vector x, Vec circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_black) { + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_.solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } @@ -59,18 +52,12 @@ void SmootherTake::solveWhiteCircleSection(Vector x, Vec circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_white) { + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_.solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } diff --git a/include/Smoother/smoother.h b/include/Smoother/smoother.h index 19eb78ef..07f95051 100644 --- a/include/Smoother/smoother.h +++ b/include/Smoother/smoother.h @@ -5,6 +5,7 @@ #include #include "../InputFunctions/domainGeometry.h" +#include "../InputFunctions/densityProfileCoefficients.h" template class LevelCache; @@ -12,7 +13,6 @@ class LevelCache; template class Level; -#include "../InputFunctions/densityProfileCoefficients.h" #include "../Level/level.h" #include "../PolarGrid/polargrid.h" #include "../Definitions/global_definitions.h" @@ -22,6 +22,7 @@ class Level; #include "../LinearAlgebra/Matrix/coo_matrix.h" #include "../LinearAlgebra/Matrix/csr_matrix.h" #include "../LinearAlgebra/Solvers/csr_lu_solver.h" +#include "../LinearAlgebra/Solvers/coo_mumps_solver.h" #include "../Stencil/stencil.h" #ifdef GMGPOLAR_USE_MUMPS From eac2b4af4f20c72ae17de852ab6dadf9b24b9500 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Wed, 18 Mar 2026 21:52:58 +0100 Subject: [PATCH 9/9] Implement suggestion --- .../directSolverGive.inl | 2 +- .../directSolverTake.inl | 2 +- .../LinearAlgebra/Solvers/coo_mumps_solver.h | 2 +- .../SmootherTake/buildInnerBoundaryAsc.inl | 5 ++- include/Smoother/SmootherTake/smootherTake.h | 41 ++++++++++--------- .../Smoother/SmootherTake/smootherTake.inl | 4 +- .../Smoother/SmootherTake/solveAscSystem.inl | 13 +----- .../Solvers/coo_mumps_solver.cpp | 2 +- .../Solvers/coo_mumps_solver.cpp | 4 +- 9 files changed, 35 insertions(+), 40 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl index 5d42128b..fec3823e 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.inl @@ -22,7 +22,7 @@ void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector so // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - mumps_solver_.solve(solution); + mumps_solver_.solveInPlace(solution); } #endif diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl index 1e6aab16..07536127 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.inl @@ -22,7 +22,7 @@ void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector so // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - mumps_solver_.solve(solution); + mumps_solver_.solveInPlace(solution); } #endif diff --git a/include/LinearAlgebra/Solvers/coo_mumps_solver.h b/include/LinearAlgebra/Solvers/coo_mumps_solver.h index 1a0d72f8..a4eb6dab 100644 --- a/include/LinearAlgebra/Solvers/coo_mumps_solver.h +++ b/include/LinearAlgebra/Solvers/coo_mumps_solver.h @@ -37,7 +37,7 @@ class CooMumpsSolver ~CooMumpsSolver(); // rhs is overwritten in-place with the solution on return. - void solve(Vector& rhs); + void solveInPlace(Vector& rhs); private: void initialize(); diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 48e8a23d..8446d57d 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -26,7 +26,7 @@ static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, template void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( - int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& matrix, ConstVector& arr, + int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, ConstVector& arr, ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) { using smoother_take::update_CSR_COO_MatrixElement; @@ -131,7 +131,8 @@ void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( } template -typename SmootherTake::MatrixType SmootherTake::buildInteriorBoundarySolverMatrix() +typename SmootherTake::InnerBoundaryMatrix +SmootherTake::buildInteriorBoundarySolverMatrix() { const PolarGrid& grid = Smoother::grid_; const LevelCache& level_cache = Smoother::level_cache_; diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 6b90d0d2..e567998e 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -108,26 +108,29 @@ class SmootherTake : public Smoother /* Interior boundary solver */ /* ------------------------ */ - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. + // The inner circle matrix (i_r = 0) is NOT tridiagonal due to across-origin coupling. + // It is solved using a general-purpose sparse solver. + // - MUMPS: matrix assembled in COO format; solver owns the matrix internally. + // - In-house: matrix stored in CSR; solver does not own the matrix. #ifdef GMGPOLAR_USE_MUMPS - // When using the MUMPS solver, the matrix is assembled in COO format. - using MatrixType = SparseMatrixCOO; - // MUMPS solver structure with the solver matrix initialized in the constructor. - CooMumpsSolver inner_boundary_mumps_solver_; + using InnerBoundaryMatrix = SparseMatrixCOO; + using InnerBoundarySolver = CooMumpsSolver; #else - // When using the in-house solver, the matrix is stored in CSR format. - using MatrixType = SparseMatrixCSR; - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - // LU solver for the inner boundary circle block. - SparseLUSolver inner_boundary_lu_solver_; + using InnerBoundaryMatrix = SparseMatrixCSR; + using InnerBoundarySolver = SparseLUSolver; + + // Stored only for the in-house solver (CSR). + InnerBoundaryMatrix inner_boundary_circle_matrix_; #endif + // Solver object (owns matrix if MUMPS, references if in-house solver). + InnerBoundarySolver inner_boundary_solver_; + + /* -------------- */ + /* Stencil access */ + /* -------------- */ + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -150,12 +153,12 @@ class SmootherTake : public Smoother ConstVector& coeff_beta); // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. - MatrixType buildInteriorBoundarySolverMatrix(); + InnerBoundaryMatrix buildInteriorBoundarySolverMatrix(); // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& matrix, ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + InnerBoundaryMatrix& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ diff --git a/include/Smoother/SmootherTake/smootherTake.inl b/include/Smoother/SmootherTake/smootherTake.inl index 4f76482e..083ed106 100644 --- a/include/Smoother/SmootherTake/smootherTake.inl +++ b/include/Smoother/SmootherTake/smootherTake.inl @@ -10,10 +10,10 @@ SmootherTake::SmootherTake(const PolarGrid& grid, const LevelCac , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) #ifdef GMGPOLAR_USE_MUMPS - , inner_boundary_mumps_solver_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_solver_(buildInteriorBoundarySolverMatrix()) #else , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) - , inner_boundary_lu_solver_(inner_boundary_circle_matrix_) + , inner_boundary_solver_(inner_boundary_circle_matrix_) #endif { buildTridiagonalSolverMatrices(); diff --git a/include/Smoother/SmootherTake/solveAscSystem.inl b/include/Smoother/SmootherTake/solveAscSystem.inl index aa0ef6e8..1e8bc966 100644 --- a/include/Smoother/SmootherTake/solveAscSystem.inl +++ b/include/Smoother/SmootherTake/solveAscSystem.inl @@ -18,11 +18,7 @@ void SmootherTake::solveBlackCircleSection(Vector x, Vec if (is_inner_circle_black) { Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.solve(inner_boundary); -#else - inner_boundary_lu_solver_.solveInPlace(inner_boundary); -#endif + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x @@ -53,12 +49,7 @@ void SmootherTake::solveWhiteCircleSection(Vector x, Vec if (is_inner_circle_white) { Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); - -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.solve(inner_boundary); -#else - inner_boundary_lu_solver_.solveInPlace(inner_boundary); -#endif + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp index 84ec9af1..7080453c 100644 --- a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -23,7 +23,7 @@ CooMumpsSolver::~CooMumpsSolver() finalize(); } -void CooMumpsSolver::solve(Vector& rhs) +void CooMumpsSolver::solveInPlace(Vector& rhs) { assert(std::ssize(rhs) == mumps_solver_.n); diff --git a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp index eb3344c9..46724c6b 100644 --- a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -36,7 +36,7 @@ TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) rhs(2) = 6.0; rhs(3) = 8.0; - solver.solve(rhs); + solver.solveInPlace(rhs); Vector solution("solution", 4); solution(0) = 140.0 / 43.0; @@ -80,7 +80,7 @@ TEST(CooMumpsSolverTest, SymmetricPositiveDefinite4x4) rhs(2) = 6.0; rhs(3) = 8.0; - solver.solve(rhs); + solver.solveInPlace(rhs); Vector solution("solution", 4); solution(0) = 9.0 / 46.0;