30 #ifdef KRATOS_SMP_OPENMP
39 #define KRATOS_CRITICAL_SECTION const std::lock_guard scope_lock(ParallelUtilities::GetGlobalLock());
41 #define KRATOS_PREPARE_CATCH_THREAD_EXCEPTION std::stringstream err_stream;
43 #define KRATOS_CATCH_THREAD_EXCEPTION \
44 } catch(Exception& e) { \
45 KRATOS_CRITICAL_SECTION \
46 err_stream << "Thread #" << i << " caught exception: " << e.what(); \
47 } catch(std::exception& e) { \
48 KRATOS_CRITICAL_SECTION \
49 err_stream << "Thread #" << i << " caught exception: " << e.what(); \
51 KRATOS_CRITICAL_SECTION \
52 err_stream << "Thread #" << i << " caught unknown exception:"; \
55 #define KRATOS_CHECK_AND_THROW_THREAD_EXCEPTION \
56 const std::string& err_msg = err_stream.str(); \
57 KRATOS_ERROR_IF_NOT(err_msg.empty()) << "The following errors occured in a parallel region!\n" << err_msg << std::endl;
80 [[nodiscard]]
static int GetNumThreads();
85 static void SetNumThreads(
const int NumThreads);
91 [[nodiscard]]
static int GetNumProcs();
99 [[nodiscard]]
static LockObject& GetGlobalLock();
109 static int* mspNumThreads;
121 static int InitializeNumberOfThreads();
127 static int& GetNumberOfThreads();
140 template<
class TIterator,
int MaxThreads=Globals::MaxAllowedThreads>
153 std::is_same_v<
typename std::iterator_traits<TIterator>::iterator_category, std::random_access_iterator_tag>,
154 "BlockPartition requires random access iterators to divide the input range into partitions"
156 KRATOS_ERROR_IF(Nchunks < 1) <<
"Number of chunks must be > 0 (and not " << Nchunks <<
")" << std::endl;
158 const std::ptrdiff_t size_container = it_end-it_begin;
160 if (size_container == 0) {
164 mNchunks =
std::min(
static_cast<int>(size_container), Nchunks);
166 const std::ptrdiff_t block_partition_size = size_container / mNchunks;
167 mBlockPartition[0] = it_begin;
168 mBlockPartition[mNchunks] = it_end;
169 for (
int i=1;
i<mNchunks;
i++) {
170 mBlockPartition[
i] = mBlockPartition[
i-1] + block_partition_size;
177 template <
class TUnaryFunction>
182 #pragma omp parallel for
183 for (
int i=0;
i<mNchunks; ++
i) {
185 for (
auto it = mBlockPartition[
i]; it != mBlockPartition[
i+1]; ++it) {
199 template <
class TReducer,
class TUnaryFunction>
200 [[nodiscard]]
inline typename TReducer::return_type
for_each(TUnaryFunction &&
f)
204 TReducer global_reducer;
205 #pragma omp parallel for
206 for (
int i=0;
i<mNchunks; ++
i) {
208 TReducer local_reducer;
209 for (
auto it = mBlockPartition[
i]; it != mBlockPartition[
i+1]; ++it) {
210 local_reducer.LocalReduce(
f(*it));
212 global_reducer.ThreadSafeReduce(local_reducer);
218 return global_reducer.GetValue();
225 template <
class TThreadLocalStorage,
class TFunction>
226 inline void for_each(
const TThreadLocalStorage& rThreadLocalStoragePrototype, TFunction &&
f)
228 static_assert(std::is_copy_constructible<TThreadLocalStorage>::value,
"TThreadLocalStorage must be copy constructible!");
235 TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype);
238 for(
int i=0;
i<mNchunks; ++
i){
240 for (
auto it = mBlockPartition[
i]; it != mBlockPartition[
i+1]; ++it){
241 f(*it, thread_local_storage);
255 template <
class TReducer,
class TThreadLocalStorage,
class TFunction>
256 [[nodiscard]]
inline typename TReducer::return_type
for_each(
const TThreadLocalStorage& rThreadLocalStoragePrototype, TFunction &&
f)
258 static_assert(std::is_copy_constructible<TThreadLocalStorage>::value,
"TThreadLocalStorage must be copy constructible!");
262 TReducer global_reducer;
267 TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype);
270 for (
int i=0;
i<mNchunks; ++
i) {
272 TReducer local_reducer;
273 for (
auto it = mBlockPartition[
i]; it != mBlockPartition[
i+1]; ++it) {
274 local_reducer.LocalReduce(
f(*it, thread_local_storage));
276 global_reducer.ThreadSafeReduce(local_reducer);
281 return global_reducer.GetValue();
286 std::array<TIterator, MaxThreads> mBlockPartition;
296 template <
class TIterator,
298 std::enable_if_t<std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<TIterator>::iterator_category>,
bool> =
true>
312 template <
class TReduction,
315 std::enable_if_t<std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<TIterator>::iterator_category>,
bool> =
true>
316 [[nodiscard]]
typename TReduction::return_type
block_for_each(TIterator itBegin, TIterator itEnd, TFunction&& rFunction)
318 return BlockPartition<TIterator>(itBegin, itEnd).template for_each<TReduction>(std::forward<TFunction>(std::forward<TFunction>(rFunction)));
330 template <
class TIterator,
333 std::enable_if_t<std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<TIterator>::iterator_category>,
bool> =
true>
334 void block_for_each(TIterator itBegin, TIterator itEnd,
const TTLS& rTLS, TFunction &&rFunction)
349 template <
class TReduction,
353 std::enable_if_t<std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<TIterator>::iterator_category>,
bool> =
true>
354 [[nodiscard]]
typename TReduction::return_type
block_for_each(TIterator itBegin, TIterator itEnd,
const TTLS& tls, TFunction&& rFunction)
356 return BlockPartition<TIterator>(itBegin, itEnd).template for_each<TReduction>(tls, std::forward<TFunction>(std::forward<TFunction>(rFunction)));
365 template <
class TContainerType,
367 std::enable_if_t<!std::is_same_v<
368 std::iterator_traits<
typename decltype(std::declval<std::remove_cv_t<TContainerType>>().begin())::value_type>,
384 template <
class TReducer,
385 class TContainerType,
387 std::enable_if_t<!std::is_same_v<
388 std::iterator_traits<
typename decltype(std::declval<std::remove_cv_t<TContainerType>>().begin())::value_type>,
394 return block_for_each<TReducer>(
v.begin(),
v.end(), std::forward<TFunctionType>(
func));
405 template <
class TContainerType,
406 class TThreadLocalStorage,
408 std::enable_if_t<!std::is_same_v<
409 std::iterator_traits<
typename decltype(std::declval<std::remove_cv_t<TContainerType>>().begin())::value_type>,
427 template <
class TReducer,
428 class TContainerType,
429 class TThreadLocalStorage,
431 std::enable_if_t<!std::is_same_v<
432 std::iterator_traits<
typename decltype(std::declval<std::remove_cv_t<TContainerType>>().begin())::value_type>,
436 [[nodiscard]] typename TReducer::return_type
block_for_each(TContainerType &&
v,
const TThreadLocalStorage& tls, TFunctionType &&
func)
438 return block_for_each<TReducer>(
v.begin(),
v.end(), tls, std::forward<TFunctionType>(
func));
449 template<
class TIndexType=std::
size_t,
int TMaxThreads=Globals::MaxAllowedThreads>
461 KRATOS_ERROR_IF(Nchunks < 1) <<
"Number of chunks must be > 0 (and not " << Nchunks <<
")" << std::endl;
470 const int block_partition_size =
Size / mNchunks;
471 mBlockPartition[0] = 0;
472 mBlockPartition[mNchunks] =
Size;
473 for (
int i=1;
i<mNchunks;
i++) {
474 mBlockPartition[
i] = mBlockPartition[
i-1] + block_partition_size;
481 template <
class TUnaryFunction>
484 std::vector< std::future<void> > runners(mNchunks);
485 const auto& partition = mBlockPartition;
486 for (
int i=0;
i<mNchunks; ++
i) {
487 runners[
i] = std::async(std::launch::async, [&partition,
i, &
f]()
489 for (
auto k = partition[
i];
k < partition[
i+1]; ++
k) {
496 for(
int i=0;
i<mNchunks; ++
i) {
501 KRATOS_ERROR << std::endl <<
"THREAD number: " <<
i <<
" caught exception " <<
e.what() << std::endl;
502 }
catch(std::exception&
e) {
503 KRATOS_ERROR << std::endl <<
"THREAD number: " <<
i <<
" caught exception " <<
e.what() << std::endl;
505 KRATOS_ERROR << std::endl <<
"unknown error" << std::endl;
513 template <
class TUnaryFunction>
518 #pragma omp parallel for
519 for (
int i=0;
i<mNchunks; ++
i) {
521 for (
auto k = mBlockPartition[
i];
k < mBlockPartition[
i+1]; ++
k) {
534 template <
class TReducer,
class TUnaryFunction>
535 [[nodiscard]]
inline typename TReducer::return_type
for_each(TUnaryFunction &&
f)
539 TReducer global_reducer;
540 #pragma omp parallel for
541 for (
int i=0;
i<mNchunks; ++
i) {
543 TReducer local_reducer;
544 for (
auto k = mBlockPartition[
i];
k < mBlockPartition[
i+1]; ++
k) {
545 local_reducer.LocalReduce(
f(
k));
547 global_reducer.ThreadSafeReduce(local_reducer);
551 return global_reducer.GetValue();
559 template <
class TThreadLocalStorage,
class TFunction>
560 inline void for_each(
const TThreadLocalStorage& rThreadLocalStoragePrototype, TFunction &&
f)
562 static_assert(std::is_copy_constructible<TThreadLocalStorage>::value,
"TThreadLocalStorage must be copy constructible!");
569 TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype);
572 for (
int i=0;
i<mNchunks; ++
i) {
574 for (
auto k = mBlockPartition[
i];
k < mBlockPartition[
i+1]; ++
k) {
575 f(
k, thread_local_storage);
589 template <
class TReducer,
class TThreadLocalStorage,
class TFunction>
590 [[nodiscard]]
inline typename TReducer::return_type
for_each(
const TThreadLocalStorage& rThreadLocalStoragePrototype, TFunction &&
f)
592 static_assert(std::is_copy_constructible<TThreadLocalStorage>::value,
"TThreadLocalStorage must be copy constructible!");
596 TReducer global_reducer;
601 TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype);
604 for (
int i=0;
i<mNchunks; ++
i) {
606 TReducer local_reducer;
607 for (
auto k = mBlockPartition[
i];
k < mBlockPartition[
i+1]; ++
k) {
608 local_reducer.LocalReduce(
f(
k, thread_local_storage));
610 global_reducer.ThreadSafeReduce(local_reducer);
616 return global_reducer.GetValue();
621 std::array<TIndexType, TMaxThreads> mBlockPartition;
626 #undef KRATOS_PREPARE_CATCH_THREAD_EXCEPTION
627 #undef KRATOS_CATCH_THREAD_EXCEPTION
628 #undef KRATOS_CHECK_AND_THROW_THREAD_EXCEPTION
Definition: parallel_utilities.h:142
TReducer::return_type for_each(TUnaryFunction &&f)
loop allowing reductions. f called on every entry in rData the function f needs to return the values ...
Definition: parallel_utilities.h:200
TReducer::return_type for_each(const TThreadLocalStorage &rThreadLocalStoragePrototype, TFunction &&f)
loop with thread local storage (TLS) allowing reductions. f called on every entry in rData the functi...
Definition: parallel_utilities.h:256
BlockPartition(TIterator it_begin, TIterator it_end, int Nchunks=ParallelUtilities::GetNumThreads())
Definition: parallel_utilities.h:148
void for_each(const TThreadLocalStorage &rThreadLocalStoragePrototype, TFunction &&f)
loop with thread local storage (TLS). f called on every entry in rData
Definition: parallel_utilities.h:226
void for_each(TUnaryFunction &&f)
simple iteration loop. f called on every entry in rData
Definition: parallel_utilities.h:178
Extends the std::exception class with more information about error location.
Definition: exception.h:49
This class is useful for index iteration over containers.
Definition: parallel_utilities.h:451
TReducer::return_type for_each(TUnaryFunction &&f)
Definition: parallel_utilities.h:535
TReducer::return_type for_each(const TThreadLocalStorage &rThreadLocalStoragePrototype, TFunction &&f)
Definition: parallel_utilities.h:590
IndexPartition(TIndexType Size, int Nchunks=ParallelUtilities::GetNumThreads())
constructor using the size of the partition to be used
Definition: parallel_utilities.h:458
void for_each(const TThreadLocalStorage &rThreadLocalStoragePrototype, TFunction &&f)
loop with thread local storage (TLS). f called on every entry in rData
Definition: parallel_utilities.h:560
void for_pure_c11(TUnaryFunction &&f)
Definition: parallel_utilities.h:482
void for_each(TUnaryFunction &&f)
Definition: parallel_utilities.h:514
This class defines and stores a lock and gives an interface to it.
Definition: lock_object.h:42
Shared memory parallelism related helper class.
Definition: parallel_utilities.h:68
static int GetNumThreads()
Returns the current number of threads.
Definition: parallel_utilities.cpp:34
#define KRATOS_TRY
Definition: define.h:109
#define KRATOS_ERROR
Definition: exception.h:161
#define KRATOS_ERROR_IF(conditional)
Definition: exception.h:162
static double min(double a, double b)
Definition: GeometryFunctions.h:71
TSpaceType::IndexType Size(TSpaceType &dummy, typename TSpaceType::VectorType const &rV)
Definition: add_strategies_to_python.cpp:111
REF: G. R. Cowper, GAUSSIAN QUADRATURE FORMULAS FOR TRIANGLES.
Definition: mesh_condition.cpp:21
REACTION_CHECK_STIFFNESS_FACTOR INNER_LOOP_ITERATION DISTANCE_THRESHOLD ACTIVE_CHECK_FACTOR AUXILIAR_COORDINATES NORMAL_GAP WEIGHTED_GAP WEIGHTED_SCALAR_RESIDUAL bool
Definition: contact_structural_mechanics_application_variables.h:93
void block_for_each(TIterator itBegin, TIterator itEnd, TFunction &&rFunction)
Execute a functor on all items of a range in parallel.
Definition: parallel_utilities.h:299
def func(args)
Definition: fde_solve.py:101
v
Definition: generate_convection_diffusion_explicit_element.py:114
f
Definition: generate_convection_diffusion_explicit_element.py:112
tuple const
Definition: ode_solve.py:403
int k
Definition: quadrature.py:595
integer i
Definition: TensorModule.f:17
e
Definition: run_cpp_mpi_tests.py:31
#define KRATOS_CATCH_THREAD_EXCEPTION
Definition: parallel_utilities.h:43
#define KRATOS_CHECK_AND_THROW_THREAD_EXCEPTION
Definition: parallel_utilities.h:55
#define KRATOS_PREPARE_CATCH_THREAD_EXCEPTION
Definition: parallel_utilities.h:41