Skip to content

Commit 3dab032

Browse files
committed
Add bp1 files
1 parent 4655586 commit 3dab032

File tree

2 files changed

+377
-0
lines changed

2 files changed

+377
-0
lines changed

tests/mfem4_bps/bp1.cpp

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2+
// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3+
// reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project
11+
// (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy
12+
// organizations (Office of Science and the National Nuclear Security
13+
// Administration) responsible for the planning and preparation of a capable
14+
// exascale ecosystem, including software, applications, hardware, advanced
15+
// system engineering and early testbed platforms, in support of the nation's
16+
// exascale computing imperative.
17+
18+
//
19+
// CEED Bake-off Problem 1 (based on MFEM Example 1p)
20+
//
21+
22+
#include "mfem.hpp"
23+
#include <fstream>
24+
#include <iostream>
25+
26+
using namespace std;
27+
using namespace mfem;
28+
29+
Mesh *make_mesh(int myid, int num_procs, int dim, int level,
30+
int &par_ref_levels, Array<int> &nxyz);
31+
32+
int main(int argc, char *argv[])
33+
{
34+
// 1. Initialize MPI.
35+
int num_procs, myid;
36+
MPI_Init(&argc, &argv);
37+
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
38+
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
39+
40+
// 2. Parse command-line options.
41+
int dim = 3;
42+
int level = 0;
43+
int order = 1;
44+
const char *device_config = "cpu";
45+
46+
OptionsParser args(argc, argv);
47+
args.AddOption(&dim, "-dim", "--mesh-dimension",
48+
"Solve 2D or 3D problem.");
49+
args.AddOption(&level, "-l", "--refinement-level",
50+
"Set the problem size: 2^level mesh elements per processor.");
51+
args.AddOption(&order, "-o", "--order",
52+
"Finite element order (polynomial degree).");
53+
args.AddOption(&device_config, "-d", "--device",
54+
"Device configuration string, see Device::Configure().");
55+
args.Parse();
56+
if (!args.Good())
57+
{
58+
if (myid == 0)
59+
{
60+
args.PrintUsage(cout);
61+
}
62+
MPI_Finalize();
63+
return 1;
64+
}
65+
if (myid == 0)
66+
{
67+
args.PrintOptions(cout);
68+
}
69+
70+
// 3. Enable hardware devices such as GPUs, and programming models such as
71+
// CUDA, OCCA, RAJA and OpenMP based on command line options.
72+
Device device(device_config);
73+
if (myid == 0) { device.Print(); }
74+
75+
// 4. Read the (serial) mesh from the given mesh file on all processors. We
76+
// can handle triangular, quadrilateral, tetrahedral, hexahedral, surface
77+
// and volume meshes with the same code.
78+
int par_ref_levels;
79+
Array<int> nxyz;
80+
Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz);
81+
82+
// 5. Refine the serial mesh on all processors to increase the resolution. In
83+
// this example we do 'ref_levels' of uniform refinement. We choose
84+
// 'ref_levels' to be the largest number that gives a final mesh with no
85+
// more than 10,000 elements.
86+
{
87+
// skip
88+
}
89+
90+
// 6. Define a parallel mesh by a partitioning of the serial mesh. Refine
91+
// this mesh further in parallel to increase the resolution. Once the
92+
// parallel mesh is defined, the serial mesh can be deleted.
93+
int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL;
94+
ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning);
95+
delete [] partitioning;
96+
delete mesh;
97+
for (int l = 0; l < par_ref_levels; l++)
98+
{
99+
pmesh->UniformRefinement();
100+
}
101+
// pmesh->PrintInfo();
102+
long global_ne = pmesh->ReduceInt(pmesh->GetNE());
103+
if (myid == 0)
104+
{
105+
cout << "Total number of elements: " << global_ne << endl;
106+
}
107+
108+
// 7. Define a parallel finite element space on the parallel mesh. Here we
109+
// use continuous Lagrange finite elements of the specified order. If
110+
// order < 1, we instead use an isoparametric/isogeometric space.
111+
FiniteElementCollection *fec;
112+
MFEM_VERIFY(order > 0, "invalid 'order': " << order);
113+
fec = new H1_FECollection(order, dim);
114+
ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec);
115+
HYPRE_Int size = fespace->GlobalTrueVSize();
116+
if (myid == 0)
117+
{
118+
cout << "Number of finite element unknowns: " << size << endl;
119+
}
120+
121+
// 8. Determine the list of true (i.e. parallel conforming) essential
122+
// boundary dofs. In this example, the boundary conditions are defined
123+
// by marking all the boundary attributes from the mesh as essential
124+
// (Dirichlet) and converting them to a list of true dofs.
125+
Array<int> ess_tdof_list;
126+
if (pmesh->bdr_attributes.Size())
127+
{
128+
Array<int> ess_bdr(pmesh->bdr_attributes.Max());
129+
ess_bdr = 1;
130+
fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list);
131+
}
132+
133+
// 9. Set up the parallel linear form b(.) which corresponds to the
134+
// right-hand side of the FEM linear system, which in this case is
135+
// (1,phi_i) where phi_i are the basis functions in fespace.
136+
ParLinearForm *b = new ParLinearForm(fespace);
137+
ConstantCoefficient one(1.0);
138+
b->AddDomainIntegrator(new DomainLFIntegrator(one));
139+
b->Assemble();
140+
141+
// 10. Define the solution vector x as a parallel finite element grid function
142+
// corresponding to fespace. Initialize x with initial guess of zero,
143+
// which satisfies the boundary conditions.
144+
ParGridFunction x(fespace);
145+
x = 0.0;
146+
147+
// 11. Set up the parallel bilinear form a(.,.) on the finite element space
148+
// corresponding to the Laplacian operator -Delta, by adding the Diffusion
149+
// domain integrator.
150+
ParBilinearForm *a = new ParBilinearForm(fespace);
151+
a->SetAssemblyLevel(AssemblyLevel::PARTIAL);
152+
a->AddDomainIntegrator(new MassIntegrator(one));
153+
154+
// 12. Assemble the parallel bilinear form and the corresponding linear
155+
// system, applying any necessary transformations such as: parallel
156+
// assembly, eliminating boundary conditions, applying conforming
157+
// constraints for non-conforming AMR, static condensation, etc.
158+
a->Assemble();
159+
160+
OperatorPtr A;
161+
Vector B, X;
162+
a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B);
163+
164+
// 13. Solve the linear system A X = B.
165+
// * With full assembly, use the BoomerAMG preconditioner from hypre.
166+
// * With partial assembly, use no preconditioner, for now.
167+
const int max_cg_iter = 200;
168+
const int cg_print_level = 3;
169+
CGSolver cg(MPI_COMM_WORLD);
170+
cg.SetRelTol(1e-12);
171+
cg.SetMaxIter(max_cg_iter);
172+
cg.SetPrintLevel(cg_print_level);
173+
cg.SetOperator(*A);
174+
175+
// Warm-up CG solve (in case of JIT to avoid timing it)
176+
{
177+
Vector Xtmp(X);
178+
cg.SetMaxIter(2);
179+
cg.SetPrintLevel(-1);
180+
cg.Mult(B, Xtmp);
181+
cg.SetMaxIter(max_cg_iter);
182+
cg.SetPrintLevel(cg_print_level);
183+
}
184+
185+
// Sync all ranks
186+
MPI_Barrier(pmesh->GetComm());
187+
188+
// Start CG timing.
189+
tic_toc.Clear();
190+
tic_toc.Start();
191+
192+
cg.Mult(B, X);
193+
194+
// Stop CG timing. Print timing results.
195+
tic_toc.Stop();
196+
double rt_min, rt_max, my_rt;
197+
my_rt = tic_toc.RealTime();
198+
MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm());
199+
MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm());
200+
if (myid == 0)
201+
{
202+
int cg_iter = cg.GetNumIterations();
203+
// Note: In the pcg algorithm, the number of operator Mult() calls is
204+
// N_iter and the number of preconditioner Mult() calls is N_iter+1.
205+
cout << '\n'
206+
<< "Total CG time: " << rt_max << " (" << rt_min << ") sec."
207+
<< endl;
208+
cout << "Time per CG step: "
209+
<< rt_max / cg_iter << " ("
210+
<< rt_min / cg_iter << ") sec." << endl;
211+
cout << "\n\"DOFs/sec\" in CG: "
212+
<< 1e-6*size*cg_iter/rt_max << " ("
213+
<< 1e-6*size*cg_iter/rt_min << ") million.\n"
214+
<< endl;
215+
}
216+
217+
// 14. Recover the parallel grid function corresponding to X. This is the
218+
// local finite element solution on each processor.
219+
a->RecoverFEMSolution(X, *b, x);
220+
221+
// 15. Save the refined mesh and the solution in parallel. This output can
222+
// be viewed later using GLVis: "glvis -np <np> -m mesh -g sol".
223+
{
224+
// skip
225+
}
226+
227+
// 16. Send the solution by socket to a GLVis server.
228+
// if (visualization)
229+
{
230+
// skip
231+
}
232+
233+
// 17. Free the used memory.
234+
delete a;
235+
delete b;
236+
delete fespace;
237+
delete fec;
238+
delete pmesh;
239+
240+
MPI_Finalize();
241+
242+
return 0;
243+
}
244+
245+
Mesh *make_mesh(int myid, int num_procs, int dim, int level,
246+
int &par_ref_levels, Array<int> &nxyz)
247+
{
248+
int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5);
249+
MFEM_VERIFY((1 << log_p) == num_procs,
250+
"number of processor is not a power of 2: " << num_procs);
251+
MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!");
252+
253+
// Determine processor decomposition.
254+
int s[3];
255+
s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0);
256+
s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0);
257+
s[2] = log_p/3;
258+
nxyz.SetSize(dim);
259+
nxyz[0] = 1 << s[0];
260+
nxyz[1] = 1 << s[1];
261+
nxyz[2] = 1 << s[2];
262+
263+
// Determine mesh size.
264+
int ser_level = level%3;
265+
par_ref_levels = level/3;
266+
int log_n = log_p + ser_level;
267+
int t[3];
268+
t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0);
269+
t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0);
270+
t[2] = log_n/3;
271+
272+
// Create the Mesh.
273+
const bool gen_edges = true;
274+
const bool sfc_ordering = true;
275+
Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2],
276+
Element::HEXAHEDRON, gen_edges,
277+
1.0, 1.0, 1.0, sfc_ordering);
278+
if (myid == 0)
279+
{
280+
cout << "Processor partitioning: ";
281+
nxyz.Print(cout, dim);
282+
283+
// Mesh dimensions AFTER parallel refinement:
284+
cout << "Mesh dimensions: "
285+
<< (1 << (t[0]+par_ref_levels)) << ' '
286+
<< (1 << (t[1]+par_ref_levels)) << ' '
287+
<< (1 << (t[2]+par_ref_levels)) << endl;
288+
}
289+
290+
return mesh;
291+
}

tests/mfem4_bps/bp1.sh

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2+
# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3+
# reserved. See files LICENSE and NOTICE for details.
4+
#
5+
# This file is part of CEED, a collection of benchmarks, miniapps, software
6+
# libraries and APIs for efficient high-order finite element and spectral
7+
# element discretizations for exascale applications. For more information and
8+
# source code availability see http://github.com/ceed.
9+
#
10+
# The CEED research is supported by the Exascale Computing Project
11+
# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy
12+
# organizations (Office of Science and the National Nuclear Security
13+
# Administration) responsible for the planning and preparation of a capable
14+
# exascale ecosystem, including software, applications, hardware, advanced
15+
# system engineering and early testbed platforms, in support of the nation's
16+
# exascale computing imperative.
17+
18+
19+
function build_tests()
20+
{
21+
local make_opts=(
22+
"-j" "$num_proc_build"
23+
"MFEM_DIR=$MFEM_DIR"
24+
"BLD=$test_exe_dir/")
25+
quoted_echo make "${make_opts[@]}"
26+
[[ -n "$dry_run" ]] || make "${make_opts[@]}"
27+
}
28+
29+
30+
function run_tests()
31+
{
32+
local test_name="bp1"
33+
set_mpi_options
34+
# 'min_p' can be set on the command line
35+
local l_min_p=${min_p:-1}
36+
# 'max_p' can be set on the command line
37+
local l_max_p=${max_p:-8}
38+
# 'max_dofs_proc' can be set on the command line
39+
local l_max_dofs_proc=${max_dofs_proc:-4200000}
40+
# 'mfem_devs' can be set on the command line
41+
local l_mfem_devs=(${mfem_devs:-cpu})
42+
local dim=3
43+
local args=
44+
for dev in ${l_mfem_devs[@]}; do
45+
for ((p = l_min_p; p <= l_max_p; p++)) do
46+
for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do
47+
args=(-o $p -l $l -d $dev)
48+
if [ -z "$dry_run" ]; then
49+
echo "Running test:"
50+
quoted_echo $mpi_run ./$test_name "${args[@]}"
51+
$mpi_run ./$test_name "${args[@]}" || \
52+
printf "\nError in the test, error code: $?\n\n"
53+
else
54+
$dry_run $mpi_run ./$test_name "${args[@]}"
55+
fi
56+
done
57+
done
58+
done
59+
}
60+
61+
62+
function build_and_run_tests()
63+
{
64+
$dry_run cd "$test_dir"
65+
66+
build_tests || return 1
67+
echo
68+
69+
[[ -n "$build_only" ]] && return
70+
71+
$dry_run cd "$test_exe_dir"
72+
run_tests
73+
}
74+
75+
76+
mfem_branch=${mfem_branch:-master}
77+
78+
# test_required_packages="metis hypre cuda openmp occa raja libceed mfem"
79+
80+
# without OpenMP:
81+
#test_required_packages="metis hypre cuda occa raja libceed mfem"
82+
83+
# with just RAJA and CUDA:
84+
#test_required_packages="metis hypre cuda raja mfem"
85+
86+
test_required_packages="metis hypre cuda mfem"

0 commit comments

Comments
 (0)