|
| 1 | +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at |
| 2 | +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights |
| 3 | +// reserved. See files LICENSE and NOTICE for details. |
| 4 | +// |
| 5 | +// This file is part of CEED, a collection of benchmarks, miniapps, software |
| 6 | +// libraries and APIs for efficient high-order finite element and spectral |
| 7 | +// element discretizations for exascale applications. For more information and |
| 8 | +// source code availability see http://github.com/ceed. |
| 9 | +// |
| 10 | +// The CEED research is supported by the Exascale Computing Project |
| 11 | +// (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy |
| 12 | +// organizations (Office of Science and the National Nuclear Security |
| 13 | +// Administration) responsible for the planning and preparation of a capable |
| 14 | +// exascale ecosystem, including software, applications, hardware, advanced |
| 15 | +// system engineering and early testbed platforms, in support of the nation's |
| 16 | +// exascale computing imperative. |
| 17 | + |
| 18 | +// |
| 19 | +// CEED Bake-off Problem 1 (based on MFEM Example 1p) |
| 20 | +// |
| 21 | + |
| 22 | +#include "mfem.hpp" |
| 23 | +#include <fstream> |
| 24 | +#include <iostream> |
| 25 | + |
| 26 | +using namespace std; |
| 27 | +using namespace mfem; |
| 28 | + |
| 29 | +Mesh *make_mesh(int myid, int num_procs, int dim, int level, |
| 30 | + int &par_ref_levels, Array<int> &nxyz); |
| 31 | + |
| 32 | +int main(int argc, char *argv[]) |
| 33 | +{ |
| 34 | + // 1. Initialize MPI. |
| 35 | + int num_procs, myid; |
| 36 | + MPI_Init(&argc, &argv); |
| 37 | + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); |
| 38 | + MPI_Comm_rank(MPI_COMM_WORLD, &myid); |
| 39 | + |
| 40 | + // 2. Parse command-line options. |
| 41 | + int dim = 3; |
| 42 | + int level = 0; |
| 43 | + int order = 1; |
| 44 | + const char *device_config = "cpu"; |
| 45 | + |
| 46 | + OptionsParser args(argc, argv); |
| 47 | + args.AddOption(&dim, "-dim", "--mesh-dimension", |
| 48 | + "Solve 2D or 3D problem."); |
| 49 | + args.AddOption(&level, "-l", "--refinement-level", |
| 50 | + "Set the problem size: 2^level mesh elements per processor."); |
| 51 | + args.AddOption(&order, "-o", "--order", |
| 52 | + "Finite element order (polynomial degree)."); |
| 53 | + args.AddOption(&device_config, "-d", "--device", |
| 54 | + "Device configuration string, see Device::Configure()."); |
| 55 | + args.Parse(); |
| 56 | + if (!args.Good()) |
| 57 | + { |
| 58 | + if (myid == 0) |
| 59 | + { |
| 60 | + args.PrintUsage(cout); |
| 61 | + } |
| 62 | + MPI_Finalize(); |
| 63 | + return 1; |
| 64 | + } |
| 65 | + if (myid == 0) |
| 66 | + { |
| 67 | + args.PrintOptions(cout); |
| 68 | + } |
| 69 | + |
| 70 | + // 3. Enable hardware devices such as GPUs, and programming models such as |
| 71 | + // CUDA, OCCA, RAJA and OpenMP based on command line options. |
| 72 | + Device device(device_config); |
| 73 | + if (myid == 0) { device.Print(); } |
| 74 | + |
| 75 | + // 4. Read the (serial) mesh from the given mesh file on all processors. We |
| 76 | + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface |
| 77 | + // and volume meshes with the same code. |
| 78 | + int par_ref_levels; |
| 79 | + Array<int> nxyz; |
| 80 | + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); |
| 81 | + |
| 82 | + // 5. Refine the serial mesh on all processors to increase the resolution. In |
| 83 | + // this example we do 'ref_levels' of uniform refinement. We choose |
| 84 | + // 'ref_levels' to be the largest number that gives a final mesh with no |
| 85 | + // more than 10,000 elements. |
| 86 | + { |
| 87 | + // skip |
| 88 | + } |
| 89 | + |
| 90 | + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine |
| 91 | + // this mesh further in parallel to increase the resolution. Once the |
| 92 | + // parallel mesh is defined, the serial mesh can be deleted. |
| 93 | + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; |
| 94 | + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); |
| 95 | + delete [] partitioning; |
| 96 | + delete mesh; |
| 97 | + for (int l = 0; l < par_ref_levels; l++) |
| 98 | + { |
| 99 | + pmesh->UniformRefinement(); |
| 100 | + } |
| 101 | + // pmesh->PrintInfo(); |
| 102 | + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); |
| 103 | + if (myid == 0) |
| 104 | + { |
| 105 | + cout << "Total number of elements: " << global_ne << endl; |
| 106 | + } |
| 107 | + |
| 108 | + // 7. Define a parallel finite element space on the parallel mesh. Here we |
| 109 | + // use continuous Lagrange finite elements of the specified order. If |
| 110 | + // order < 1, we instead use an isoparametric/isogeometric space. |
| 111 | + FiniteElementCollection *fec; |
| 112 | + MFEM_VERIFY(order > 0, "invalid 'order': " << order); |
| 113 | + fec = new H1_FECollection(order, dim); |
| 114 | + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); |
| 115 | + HYPRE_Int size = fespace->GlobalTrueVSize(); |
| 116 | + if (myid == 0) |
| 117 | + { |
| 118 | + cout << "Number of finite element unknowns: " << size << endl; |
| 119 | + } |
| 120 | + |
| 121 | + // 8. Determine the list of true (i.e. parallel conforming) essential |
| 122 | + // boundary dofs. In this example, the boundary conditions are defined |
| 123 | + // by marking all the boundary attributes from the mesh as essential |
| 124 | + // (Dirichlet) and converting them to a list of true dofs. |
| 125 | + Array<int> ess_tdof_list; |
| 126 | + if (pmesh->bdr_attributes.Size()) |
| 127 | + { |
| 128 | + Array<int> ess_bdr(pmesh->bdr_attributes.Max()); |
| 129 | + ess_bdr = 1; |
| 130 | + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); |
| 131 | + } |
| 132 | + |
| 133 | + // 9. Set up the parallel linear form b(.) which corresponds to the |
| 134 | + // right-hand side of the FEM linear system, which in this case is |
| 135 | + // (1,phi_i) where phi_i are the basis functions in fespace. |
| 136 | + ParLinearForm *b = new ParLinearForm(fespace); |
| 137 | + ConstantCoefficient one(1.0); |
| 138 | + b->AddDomainIntegrator(new DomainLFIntegrator(one)); |
| 139 | + b->Assemble(); |
| 140 | + |
| 141 | + // 10. Define the solution vector x as a parallel finite element grid function |
| 142 | + // corresponding to fespace. Initialize x with initial guess of zero, |
| 143 | + // which satisfies the boundary conditions. |
| 144 | + ParGridFunction x(fespace); |
| 145 | + x = 0.0; |
| 146 | + |
| 147 | + // 11. Set up the parallel bilinear form a(.,.) on the finite element space |
| 148 | + // corresponding to the Laplacian operator -Delta, by adding the Diffusion |
| 149 | + // domain integrator. |
| 150 | + ParBilinearForm *a = new ParBilinearForm(fespace); |
| 151 | + a->SetAssemblyLevel(AssemblyLevel::PARTIAL); |
| 152 | + a->AddDomainIntegrator(new MassIntegrator(one)); |
| 153 | + |
| 154 | + // 12. Assemble the parallel bilinear form and the corresponding linear |
| 155 | + // system, applying any necessary transformations such as: parallel |
| 156 | + // assembly, eliminating boundary conditions, applying conforming |
| 157 | + // constraints for non-conforming AMR, static condensation, etc. |
| 158 | + a->Assemble(); |
| 159 | + |
| 160 | + OperatorPtr A; |
| 161 | + Vector B, X; |
| 162 | + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); |
| 163 | + |
| 164 | + // 13. Solve the linear system A X = B. |
| 165 | + // * With full assembly, use the BoomerAMG preconditioner from hypre. |
| 166 | + // * With partial assembly, use no preconditioner, for now. |
| 167 | + const int max_cg_iter = 200; |
| 168 | + const int cg_print_level = 3; |
| 169 | + CGSolver cg(MPI_COMM_WORLD); |
| 170 | + cg.SetRelTol(1e-12); |
| 171 | + cg.SetMaxIter(max_cg_iter); |
| 172 | + cg.SetPrintLevel(cg_print_level); |
| 173 | + cg.SetOperator(*A); |
| 174 | + |
| 175 | + // Warm-up CG solve (in case of JIT to avoid timing it) |
| 176 | + { |
| 177 | + Vector Xtmp(X); |
| 178 | + cg.SetMaxIter(2); |
| 179 | + cg.SetPrintLevel(-1); |
| 180 | + cg.Mult(B, Xtmp); |
| 181 | + cg.SetMaxIter(max_cg_iter); |
| 182 | + cg.SetPrintLevel(cg_print_level); |
| 183 | + } |
| 184 | + |
| 185 | + // Sync all ranks |
| 186 | + MPI_Barrier(pmesh->GetComm()); |
| 187 | + |
| 188 | + // Start CG timing. |
| 189 | + tic_toc.Clear(); |
| 190 | + tic_toc.Start(); |
| 191 | + |
| 192 | + cg.Mult(B, X); |
| 193 | + |
| 194 | + // Stop CG timing. Print timing results. |
| 195 | + tic_toc.Stop(); |
| 196 | + double rt_min, rt_max, my_rt; |
| 197 | + my_rt = tic_toc.RealTime(); |
| 198 | + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); |
| 199 | + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); |
| 200 | + if (myid == 0) |
| 201 | + { |
| 202 | + int cg_iter = cg.GetNumIterations(); |
| 203 | + // Note: In the pcg algorithm, the number of operator Mult() calls is |
| 204 | + // N_iter and the number of preconditioner Mult() calls is N_iter+1. |
| 205 | + cout << '\n' |
| 206 | + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." |
| 207 | + << endl; |
| 208 | + cout << "Time per CG step: " |
| 209 | + << rt_max / cg_iter << " (" |
| 210 | + << rt_min / cg_iter << ") sec." << endl; |
| 211 | + cout << "\n\"DOFs/sec\" in CG: " |
| 212 | + << 1e-6*size*cg_iter/rt_max << " (" |
| 213 | + << 1e-6*size*cg_iter/rt_min << ") million.\n" |
| 214 | + << endl; |
| 215 | + } |
| 216 | + |
| 217 | + // 14. Recover the parallel grid function corresponding to X. This is the |
| 218 | + // local finite element solution on each processor. |
| 219 | + a->RecoverFEMSolution(X, *b, x); |
| 220 | + |
| 221 | + // 15. Save the refined mesh and the solution in parallel. This output can |
| 222 | + // be viewed later using GLVis: "glvis -np <np> -m mesh -g sol". |
| 223 | + { |
| 224 | + // skip |
| 225 | + } |
| 226 | + |
| 227 | + // 16. Send the solution by socket to a GLVis server. |
| 228 | + // if (visualization) |
| 229 | + { |
| 230 | + // skip |
| 231 | + } |
| 232 | + |
| 233 | + // 17. Free the used memory. |
| 234 | + delete a; |
| 235 | + delete b; |
| 236 | + delete fespace; |
| 237 | + delete fec; |
| 238 | + delete pmesh; |
| 239 | + |
| 240 | + MPI_Finalize(); |
| 241 | + |
| 242 | + return 0; |
| 243 | +} |
| 244 | + |
| 245 | +Mesh *make_mesh(int myid, int num_procs, int dim, int level, |
| 246 | + int &par_ref_levels, Array<int> &nxyz) |
| 247 | +{ |
| 248 | + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); |
| 249 | + MFEM_VERIFY((1 << log_p) == num_procs, |
| 250 | + "number of processor is not a power of 2: " << num_procs); |
| 251 | + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); |
| 252 | + |
| 253 | + // Determine processor decomposition. |
| 254 | + int s[3]; |
| 255 | + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); |
| 256 | + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); |
| 257 | + s[2] = log_p/3; |
| 258 | + nxyz.SetSize(dim); |
| 259 | + nxyz[0] = 1 << s[0]; |
| 260 | + nxyz[1] = 1 << s[1]; |
| 261 | + nxyz[2] = 1 << s[2]; |
| 262 | + |
| 263 | + // Determine mesh size. |
| 264 | + int ser_level = level%3; |
| 265 | + par_ref_levels = level/3; |
| 266 | + int log_n = log_p + ser_level; |
| 267 | + int t[3]; |
| 268 | + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); |
| 269 | + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); |
| 270 | + t[2] = log_n/3; |
| 271 | + |
| 272 | + // Create the Mesh. |
| 273 | + const bool gen_edges = true; |
| 274 | + const bool sfc_ordering = true; |
| 275 | + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], |
| 276 | + Element::HEXAHEDRON, gen_edges, |
| 277 | + 1.0, 1.0, 1.0, sfc_ordering); |
| 278 | + if (myid == 0) |
| 279 | + { |
| 280 | + cout << "Processor partitioning: "; |
| 281 | + nxyz.Print(cout, dim); |
| 282 | + |
| 283 | + // Mesh dimensions AFTER parallel refinement: |
| 284 | + cout << "Mesh dimensions: " |
| 285 | + << (1 << (t[0]+par_ref_levels)) << ' ' |
| 286 | + << (1 << (t[1]+par_ref_levels)) << ' ' |
| 287 | + << (1 << (t[2]+par_ref_levels)) << endl; |
| 288 | + } |
| 289 | + |
| 290 | + return mesh; |
| 291 | +} |
0 commit comments