Alpine3D

Alpine3D Svn Source Tree

Root/trunk/alpine3d/MPIControl.h

1/***********************************************************************************/
2/* Copyright 2009-2015 WSL Institute for Snow and Avalanche Research SLF-DAVOS */
3/***********************************************************************************/
4/* This file is part of Alpine3D.
5 Alpine3D is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 Alpine3D is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with Alpine3D. If not, see <http://www.gnu.org/licenses/>.
17*/
18#ifndef MPIWRAPPER_H
19#define MPIWRAPPER_H
20
21#ifdef ENABLE_MPI
22#include <mpi.h>
23#endif
24
25#ifdef _OPENMP
26#include <omp.h>
27#endif
28
29#include <meteoio/MeteoIO.h>
30#include <cstdio>
31
32/**
33 * @class MPIControl
34 * @brief A singleton class that deals with all aspects of parallelization within Alpine3D (MPI, OpenMP, PETSc)
35 * Any class that wishes to utilize the MPI/OpenMP/PETSc capabilities should include this header and make
36 * sure that the respective cmake compilation options (called MPI, OPENMP, PETSC) are activated. The methods
37 * are designed in a transparent way also returning meaningful values if the compilation options were not
38 * activated, e. g. bool master() will return true if MPI was not activated and if it was activated then
39 * it will return false on all processes that are not master and true solely for the master process.
40 * @author Thomas Egger
41 * @date 2014-07-15
42 */
43class MPIControl
44{
45public:
46/**
47 * Returns the single instance of this class. If an instance wasn't initialized yet, it is instantiated.
48 * @return a reference to the single instance
49 * @note MPI_Init and PetscInitialize are called entirely with NULL arguments
50 */
51static MPIControl& instance();
52
53// The following methods refer to OpenMP
54/**
55 * Returns whether or not OpenMP has been activated
56 * @return true if _OPENMP has been defined, false otherwise
57 */
58bool openmp() const;
59
60/**
61 * Returns the current OpenMP thread number or 0 if OpenMP has not been activated
62 * @return current thread number
63 */
64size_t thread() const;
65
66/**
67 * Returns the total number of OpenMP threads or 1 if OpenMP has not been activated
68 * @return number of threads
69 */
70size_t max_threads() const;
71
72// The following methods refer to MPI
73/**
74 * Returns whether the calling process is the master process or not
75 * @return true if caller is master, false otherwise
76 */
77bool master() const;
78
79/**
80 * Returns the rank of the master process
81 * @return rank of the master process
82 */
83size_t master_rank() const;
84
85/**
86 * Returns the rank of the calling process or 0 if MPI was not activated
87 * @return rank number of the calling process
88 */
89size_t rank() const;
90
91/**
92 * Returns the size of the world in usage, 1 if MPI was not activated
93 * @return the size of the MPI world
94 */
95size_t size() const;
96
97/**
98 * Returns the name of the processor of the calling process or "local" if MPI was not activated
99 * @return string name of the processor
100 */
101std::string name() const;
102
103/**
104 * This method allows to synchronize all MPI processes. It blocks the calling process until all
105 * processes have called barrier()
106 */
107void barrier() const;
108
109/**
110 * Gathers the integer send_values from all processes into a vector of integers on the root node.
111 * Index 5 of the receive_vector represents the send_value of process #5.
112 * @param[in] send_value The int value a process sends
113 * @param[out] receive_vector The buffer for the root process to hold all the send_values
114 * @param[in] root The process rank that will gather the send_values
115 */
116void gather(const int& send_value, std::vector<int>& receive_vector, const size_t& root = 0);
117
118//@{
119/**
120 * Combines values from all processes and distributes the result back to all processes.
121 * - allreduce_max distributes the maximum of all processes
122 * - allreduce_min distributes the minimum of all processes
123 * - allreduce_sum distributes the sum of all processes
124 * @param[in,out] value The value that is used to perform the reduction and to hold the result
125 */
126void allreduce_max(double& value);
127void allreduce_min(double& value);
128void allreduce_sum(double& value);
129void allreduce_sum(int& value);
130//@}
131
132/**
133 * This method is used when deserializing a class T from a void* representing a char*,
134 * instantiating an object from a string
135 * @param[in] in The void*, which is actually a char*
136 * @param[in] len The length of the string
137 * @param[out] obj The newly instantiated object
138 * @note class T must support the extraction and insertion operators, >> and <<
139 */
140template<class T> static void deserialize(const void* in, const size_t& len, T& obj)
141{
142std::stringstream obj_stream;
143obj_stream.write(reinterpret_cast<const char*>(in), len);
144obj_stream >> obj;
145}
146
147/**
148 * This method is used when serializing a class T, turning an object into a string
149 * @param[out] out The void**, which is actually a char**
150 * @param[in] obj The object to serialize
151 * @param[in] alloc allocate memory?
152 * @return The length of the string
153 * @note class T must support the extraction and insertion operators, >> and <<
154 */
155template<class T> static size_t serialize(void** out, const T& obj, const bool alloc=false)
156{
157std::stringstream obj_stream;
158obj_stream << obj;
159const size_t len = obj_stream.str().size();
160if (alloc) {
161*out = new char[len];
162}
163obj_stream.read(reinterpret_cast<char*>(*out), len);
164return len;
165}
166
167#ifdef ENABLE_MPI
168/**
169 * For custom objects of class T a custom sum function is required
170 * MPI_Op_create expects exactly this interface, thus it cannot be changed
171 * @param[in] in The void* representing a summand of the sum operation
172 * @param[out] out The void* representing the result of the sum operation
173 * @param[in] datatype A pointer to the custom datatype previously committed
174 * @note class T must support the extraction and insertion operators, \>\> and \<\<
175 * and furthermore support the operator+
176 */
177template <class T> static void op_sum_func(void* in, void* out, int* /*len*/, MPI_Datatype* datatype)
178{
179int tmp_size;
180MPI_Type_size(*datatype, &tmp_size);
181
182T in_obj, out_obj;
183
184deserialize(in, static_cast<size_t>(tmp_size), in_obj);
185deserialize(out, static_cast<size_t>(tmp_size), out_obj);
186
187out_obj += in_obj;
188
189serialize(&out, out_obj);
190}
191#endif
192
193#ifdef ENABLE_MPI
194/** @brief Adds up the values of type T from all processes and distributes the sum back to all processes.
195 * @param obj The obj that is a summand of the global sum and will hold the result
196 * @note class T must support the extraction and insertion operators, \>\> and \<\<
197 * and furthermore support the operator+
198 */
199template <class T> void allreduce_sum(T& obj)
200{
201if (size_ <= 1) return;
202
203MPI_Op op;
204MPI_Op_create(op_sum_func<T>, true, &op);
205
206void* in_obj_char=NULL;
207void* out_obj_char=NULL;
208
209const size_t len = serialize(&in_obj_char, obj, true);
210out_obj_char = new char[len];
211
212MPI_Datatype chars_datatype;
213MPI_Type_contiguous(static_cast<int>(len), MPI_CHAR, &chars_datatype);
214MPI_Type_commit(&chars_datatype);
215
216MPI_Allreduce(in_obj_char, out_obj_char, 1, chars_datatype, op, MPI_COMM_WORLD);
217MPI_Op_free(&op);
218
219deserialize(out_obj_char, len, obj);
220
221delete[] (char*)out_obj_char;
222delete[] (char*)in_obj_char;
223}
224#else
225template <class T> void allreduce_sum(T& /*obj*/) {}
226#endif
227
228#ifdef ENABLE_MPI
229/**
230 * @brief Broadcast an object via MPI or in case MPI is not activated don't do anything.
231 * @param obj that is broadcasted from process root to all processes
232 * @param[in] root The process rank that will commit the broadcast value, all others receive only
233 * @note Class T needs to have the serialize and deseralize operator << and >> implemented
234 */
235template <class T> void broadcast(T& obj, const size_t& root = 0)
236{
237if (size_ <= 1) return;
238
239std::string obj_string;
240
241if (rank_ == root) { // Build the string that represents the object
242std::stringstream obj_stream;
243obj_stream << obj;
244obj_string.insert(0, obj_stream.str());
245}
246
247broadcast(obj_string, root);
248
249if (rank_ != root) { // Deserialize the object broadcasted
250std::stringstream obj_stream;
251obj_stream << obj_string;
252obj_stream >> obj;
253}
254}
255#else
256template <class T> void broadcast(T& /*obj*/, const size_t& root=0) {(void)root;}
257#endif
258
259#ifdef ENABLE_MPI
260/**
261 * @brief Broadcast a vector of class T objects via MPI or in case MPI is not activated don't do anything.
262 * @param vec_obj A vector of class T objects that shall be broadcasted,
263 * or if not root the vector that will hold the pointers to the objects broadcasted
264 * @param[in] root The process rank that will commit the broadcast value, all others receive only
265 * @note Class T needs to have the serialize and deseralize operator \<\< and \>\> implemented
266 */
267template <class T> void broadcast(std::vector<T>& vec_obj, const size_t& root = 0)
268{
269if (size_ <= 1) return;
270
271std::string obj_string;
272size_t vec_size;
273
274if (rank_ == root) { // Build the string that represents the objects
275std::stringstream objs_stream;
276for (size_t ii=0; ii<vec_obj.size(); ii++)
277objs_stream << vec_obj[ii];
278obj_string.insert(0, objs_stream.str());
279vec_size = vec_obj.size();
280}
281
282broadcast(vec_size, root);
283broadcast(obj_string, root);
284
285if (rank_ != root) { // Deserialize the objects broadcasted
286vec_obj.clear();
287
288std::stringstream objs_stream;
289objs_stream << obj_string;
290
291T tmp;
292for (size_t ii=0; ii<vec_size; ii++) {
293objs_stream >> tmp;
294vec_obj.push_back(tmp);
295}
296}
297}
298#else
299template <class T> void broadcast(std::vector<T>& /*vec_obj*/, const size_t& root=0) {(void)root;}
300#endif
301
302#ifdef ENABLE_MPI
303/**
304 * @briefScatter the objects pointed to by vector<T*> by slices to all preocesses.
305 * Internally no MPI_Scatterv or the like is used, because the size of the
306 * strings may become extremely large. Thusly internally blocking send and receive calls
307 * are utilized. In case MPI is not activated vec_local is not changed in any way.
308 * @param[in, out] vec_local A vector of T* pointers to objects that shall be scattered;
309 * if root then this vector will also hold the pointers to the scattered objects
310 * @param[in] root The process rank that will scatter the values, all others receive only
311 * @note Class T needs to have the serialize and deseralize operator << and >> implemented
312 */
313template <class T> void scatter(std::vector<T*>& vec_local, const size_t& root = 0)
314{
315if (size_ <= 1) return;
316
317if (rank_ == root) {
318for (size_t ii=1; ii<size_; ii++) { // HACK: Assuming root is master
319size_t startx, deltax;
320getArraySliceParams(vec_local.size(), ii, startx, deltax);
321
322send((int)deltax, ii);
323for (size_t jj=startx; jj<(startx+deltax); jj++) {
324std::string obj_string;
325std::stringstream objs_stream;
326
327objs_stream << *(vec_local[jj]);
328obj_string.insert(0, objs_stream.str());
329send(obj_string, ii);
330delete vec_local[jj];
331}
332}
333
334size_t startx, deltax;
335getArraySliceParams(vec_local.size(), startx, deltax);
336vec_local.resize(deltax);
337} else {
338std::string obj_string;
339int deltax;
340receive(deltax, root);
341vec_local.resize((size_t)deltax);
342
343T obj; // temporary object
344for (size_t ii=0; ii<vec_local.size(); ii++) {
345std::stringstream objs_stream;
346receive(obj_string, root);
347
348objs_stream << obj_string;
349
350objs_stream >> obj;
351vec_local[ii] = new T(obj);
352}
353}
354}
355#else
356template <class T> void scatter(std::vector<T*>& /*vec_local*/, const size_t& root=0) {(void)root;}
357#endif
358
359#ifdef ENABLE_MPI
360/**
361 * @briefSend the objects pointed to by vector<T*> to process \#destination
362 * @param[in] vec_local A vector of T* pointers to objects that shall be sent
363 * @param[in] destination The process rank that will receive the values
364 * @param[in] tag Arbitrary non-negative integer assigned to uniquely identify a message
365 * @note Class T needs to have the serialize and deseralize operator << and >> implemented
366 */
367template <class T> void send(const std::vector<T*>& vec_local, const size_t& destination, const int& tag=0);
368template <class T> void send(const std::vector<T>& vec_local, const size_t& destination, const int& tag=0);
369#else
370template <class T> void send(const std::vector<T*>& /*vec_local*/, const size_t& /*destination*/, const int& tag=0) {(void)tag;}
371template <class T> void send(const std::vector<T>& /*vec_local*/, const size_t& /*destination*/, const int& tag=0) {(void)tag;}
372#endif
373
374#ifdef ENABLE_MPI
375/**
376 * @briefReceive vector of objects from process \#source
377 * @param[in] vec_local A vector of T* pointers to receive the object pointers
378 * @param[in] source The process rank that will send the objects
379 * @param[in] tag Arbitrary non-negative integer assigned to uniquely identify a message
380 * @note Class T needs to have the serialize and deseralize operator << and >> implemented
381 */
382template <class T> void receive(std::vector<T*>& vec_local, const size_t& source, const int& tag=0);
383template <class T> void receive(std::vector<T>& vec_local, const size_t& source, const int& tag=0);
384#else
385template <class T> void receive(std::vector<T*>& /*vec_local*/, const size_t& /*source*/, const int& tag=0) {(void)tag;}
386template <class T> void receive(std::vector<T>& /*vec_local*/, const size_t& /*source*/, const int& tag=0) {(void)tag;}
387#endif
388
389#ifdef ENABLE_MPI
390/**
391 * @briefGathers the objects pointed to by vector<T*> from all processes into a vector<T*> on
392 * the root node. Internally no MPI_Gatherv or the like is used, because the size of the
393 * strings may become extremely large. Thusly internally blocking send and receive calls
394 * are utilized. In case MPI is not activated vec_local is not changed in any way.
395 * @param[in, out] vec_local A vector of T* pointers to objects that shall be transmitted to root;
396 * if root then this vector will also hold the pointers to the gathered objects
397 * @param[in] root The process rank that will commit the broadcast value, all others receive only
398 * @note Class T needs to have the serialize and deseralize operator \<\< and \>\> implemented
399 */
400template <class T> void gather(std::vector<T*>& vec_local, const size_t& root = 0)
401{
402if (size_ <= 1) return;
403
404std::vector<int> vec_sizes;
405const size_t sum = vec_local.size();
406
407gather(vec_local.size(), vec_sizes); //global offsets
408allreduce_sum(sum); //global amount of T*
409
410if (rank_ == root) {
411std::string obj_string;
412size_t local_sum = vec_local.size();
413
414vec_local.resize(sum);
415
416T obj; //temporary object
417for (size_t ii=1; ii<vec_sizes.size(); ii++) {//HACK: Assuming master is always rank 0
418for (size_t jj=0; jj<vec_sizes[ii]; jj++) {
419std::stringstream objs_stream;
420receive(obj_string, ii);
421
422objs_stream << obj_string;
423objs_stream >> obj;
424vec_local[local_sum+jj] = new T(obj);
425}
426local_sum += vec_sizes[ii];
427}
428
429} else {
430for (size_t ii=0; ii<vec_local.size(); ii++) {
431std::string obj_string;
432std::stringstream objs_stream;
433
434objs_stream << *(vec_local[ii]);
435obj_string.insert(0, objs_stream.str());
436send(obj_string, root);
437}
438}
439}
440#else
441template <class T> void gather(std::vector<T*>& /*vec_local*/, const size_t& root=0) {(void)root;}
442#endif
443
444/** @brief Helper to split an array for domain decomposition.
445 * Given the overall size of an array calculate a start index and block length
446 * for the subarray on the basis of the current MPI rank
447 * @param[in] dimx The overall size of the array to split up
448 * @param[out] startx_sub The start of the subarray for the given processor rank
449 * @param[out] nx_sub The block length of the subarray for the given processor rank
450 */
451void getArraySliceParams(const size_t& dimx, size_t& startx_sub, size_t& nx_sub) const {getArraySliceParams(dimx, size_, rank_, startx_sub, nx_sub);}
452void getArraySliceParams(const size_t& dimx, const size_t& idx_wk, size_t& startx_sub, size_t& nx_sub) const {getArraySliceParams(dimx, size_, idx_wk, startx_sub, nx_sub);}
453
454void getArraySliceParamsOptim(const size_t& dimx, size_t& startx_sub, size_t& nx_sub,const mio::DEMObject& dem, const mio::Grid2DObject& landuse)
455 {getArraySliceParamsOptim(dimx, rank_, startx_sub, nx_sub, dem, landuse);}
456
457void getArraySliceParamsOptim(const size_t& dimx, const size_t& idx_wk, size_t& startx_sub, size_t& nx_sub,const mio::DEMObject& dem, const mio::Grid2DObject& landuse);
458
459/**
460* @brief Returns the parameters for splitting an array in several, balanced sub-arrays.
461* This is mostly usefull for parallel calculations, where an array will be split and sent to different
462* workers.
463* @param[in] dimx number of cells in the desired dimension
464* @param[in] nbworkers total number of slices
465* @param[in] idx_wk current slice index (starting at 0)
466* @param[out] startx_sub calculated start index for the current slice
467* @param[out] nx_sub calculated number of cells (in the desired dimension) of the current slice
468*/
469static void getArraySliceParams(const size_t& dimx, const size_t& nbworkers, const size_t& idx_wk, size_t& startx_sub, size_t& nx_sub);
470
471/**
472* @brief Returns the parameters for splitting an array in several, balanced sub-arrays.
473* This is mostly usefull for parallel calculations, where an array will be split and sent to different
474* workers.
475* @param[in] dimx number of cells in the desired dimension
476* @param[in] nbworkers total number of slices
477* @param[out] offset calculated start index for all slices
478* @param[out] nx calculated number of cells (in the desired dimension) of all slice
479*/
480static void getArraySliceParams(const size_t& dimx, const size_t& nbworkers, std::vector<size_t>& offset, std::vector<size_t>& nx);
481
482private:
483MPIControl();
484~MPIControl();
485
486MPIControl(MPIControl const&); //No implementation allowed - singleton class
487void operator=(MPIControl const&); //No implementation allowed - singleton class
488void broadcast(std::string& message, const size_t& root);
489
490void send(std::string& message, const size_t& recipient, const int& tag=0);
491void receive(std::string& message, const size_t& source, const int& tag=0);
492void send(const int& value, const size_t& recipient, const int& tag=0);
493void receive(int& value, const size_t& source, const int& tag=0);
494
495static void checkSuccess(const int& ierr);
496
497size_t rank_; // the rank of this process
498size_t size_; // the number of all processes
499std::string name_; // the name of the node this process is running on
500};
501
502#endif

Archive Download this file

Revision: HEAD