diff --git a/cmake/modules/FindTBB.cmake b/cmake/modules/FindTBB.cmake index a95f0e4d37a59afa85910d00cb650678b1707110..be1402d866dbeba115c2cac534439858be731d41 100644 --- a/cmake/modules/FindTBB.cmake +++ b/cmake/modules/FindTBB.cmake @@ -307,6 +307,8 @@ endif() # set variable for config.h set(HAVE_TBB ${TBB_FOUND}) +# provide include_sys_dir +include(XtCompilerSupport) # perform DUNE-specific setup tasks if(TBB_FOUND) set(TBB_CACHE_ALIGNED_ALLOCATOR_ALIGNMENT 128) @@ -320,6 +322,9 @@ if(TBB_FOUND) ${TBB_INCLUDE_DIRS} LIBRARIES ${TBB_LIBRARIES}) + foreach(_idir ${TBB_INCLUDE_DIRS}) + include_sys_dir(${_idir}) + endforeach() endif() # function for adding TBB flags to a list of targets diff --git a/cmake/modules/XtCompilerSupport.cmake b/cmake/modules/XtCompilerSupport.cmake index ec509c42fa0a2f076cc2c4907f39335a007a4292..8991f937255e42cdef409a726e24211fa9ba20d1 100644 --- a/cmake/modules/XtCompilerSupport.cmake +++ b/cmake/modules/XtCompilerSupport.cmake @@ -30,7 +30,7 @@ macro(INCLUDE_SYS_DIR) if(${ARG} MATCHES "/usr/include") message(AUTHOR_WARNING "-isystem not supported for ${ARG}") else() - add_definitions("-isystem ${_idir}") + add_definitions("-isystem ${ARG}") endif() else(IS_DIRECTORY ${ARG}) message(STATUS "Include directory ${ARG} does not exist") diff --git a/dune/xt/common/disable_warnings.hh b/dune/xt/common/disable_warnings.hh index 19145378e8caa5a9fca741f7836cae3c3f23d52c..ac0bc850dd588ef023d374f5813753af07ef4df8 100644 --- a/dune/xt/common/disable_warnings.hh +++ b/dune/xt/common/disable_warnings.hh @@ -23,7 +23,6 @@ #pragma GCC diagnostic ignored "-Wfloat-equal" #pragma GCC diagnostic ignored "-Wignored-qualifiers" #pragma GCC diagnostic ignored "-Wlogical-not-parentheses" -#pragma GCC diagnostic ignored "-Wlogical-op" #pragma GCC diagnostic ignored "-Wlogical-op-parentheses" #pragma GCC diagnostic ignored "-Wmismatched-tags" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" diff --git a/dune/xt/common/lpsolve.cc b/dune/xt/common/lpsolve.cc index 5cbd7f5060254aeced08d2994aedad0bcd1993b5..64748b52dcdcdae118cce4cea741da9eb71a156b 100644 --- a/dune/xt/common/lpsolve.cc +++ b/dune/xt/common/lpsolve.cc @@ -37,6 +37,11 @@ namespace lp_solve { #if HAVE_LPSOLVE +LinearProgram::LinearProgram() + : lp_(nullptr) +{ +} + LinearProgram::LinearProgram(int rows, int cols) : lp_(::make_lp(rows, cols)) { @@ -55,6 +60,11 @@ lprec* LinearProgram::data() } #else // HAVE_LPSOLVE +LinearProgram::LinearProgram() +{ + DUNE_THROW(Exceptions::dependency_missing, "You are missing lp_solve, check available() first!"); +} + LinearProgram::LinearProgram(int /*rows*/, int /*cols*/) { DUNE_THROW(Exceptions::dependency_missing, "You are missing lp_solve, check available() first!"); diff --git a/dune/xt/common/lpsolve.hh b/dune/xt/common/lpsolve.hh index b81b593132b28d248b3506bb164ae57e7033173f..3879018a06e5386b8c450899dff2dc691bc1d8a5 100644 --- a/dune/xt/common/lpsolve.hh +++ b/dune/xt/common/lpsolve.hh @@ -28,6 +28,7 @@ namespace lp_solve { struct LinearProgram { + LinearProgram(); LinearProgram(int rows, int cols); ~LinearProgram(); diff --git a/dune/xt/common/parallel/threadstorage.hh b/dune/xt/common/parallel/threadstorage.hh index 1dca7cc97f95dd84f0b29231f9d9efb543fe93b3..f3d733285c27c35fe1b5ef045d27648ffd906c71 100644 --- a/dune/xt/common/parallel/threadstorage.hh +++ b/dune/xt/common/parallel/threadstorage.hh @@ -11,57 +11,300 @@ #ifndef DUNE_XT_COMMON_PARALLEL_THREADSTORAGE_HH #define DUNE_XT_COMMON_PARALLEL_THREADSTORAGE_HH -#include <deque> +#if HAVE_TBB +// Hack to fix compilation with clang as tbb does not detect C++11 feature correctly for clang. Recent versions of TBB +// allow to set the macro TBB_USE_GLIBCXX_VERSION to the proper version of libstdc++ to fix this issue, see +// https://www.threadingbuildingblocks.org/docs/help/reference/appendices/known_issues/linux_os.html. For older versions +// we need the hack below. +#include <tbb/tbb_config.h> +#undef __TBB_CPP11_RVALUE_REF_PRESENT +#undef __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT +#undef __TBB_CPP11_DECLTYPE_PRESENT +#undef __TBB_CPP11_LAMBDAS_PRESENT +#define __TBB_CPP11_RVALUE_REF_PRESENT 1 +#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 1 +#define __TBB_CPP11_DECLTYPE_PRESENT 1 +#define __TBB_CPP11_LAMBDAS_PRESENT 1 +#include <tbb/enumerable_thread_specific.h> +#endif + #include <algorithm> +#include <list> #include <numeric> #include <type_traits> -#include <numeric> -#include <list> -#include <functional> +// TODO: the following includes can be removed when UnsafePerThreadValue is removed +#include <deque> +#include <memory> #include <boost/noncopyable.hpp> - -#include <dune/xt/common/type_traits.hh> -#include <dune/xt/common/memory.hh> #include <dune/xt/common/parallel/threadmanager.hh> namespace Dune { namespace XT { namespace Common { +namespace internal { + + +#if HAVE_TBB + +template <class ValueImp> +class EnumerableThreadSpecificWrapper +{ + // enumerable_thread_specific does not compile with ConstValueType as template param + using BackendType = typename tbb::enumerable_thread_specific<std::remove_const_t<ValueImp>>; + +public: + using ValueType = ValueImp; + using ConstValueType = std::add_const_t<ValueType>; + using iterator = typename BackendType::iterator; + using const_iterator = typename BackendType::const_iterator; + + template <class... InitTypes> + explicit EnumerableThreadSpecificWrapper(InitTypes&&... ctor_args) + : values_(std::forward<InitTypes>(ctor_args)...) + { + } + + ValueType& local() + { + return values_.local(); + } + + // tbb does not provide a const version of local (as elements may be inserted when a new thread accesses values_), so + // values_ has to be mutable + const ValueType& local() const + { + return values_.local(); + } + + typename BackendType::iterator begin() + { + return values_.begin(); + } + + typename BackendType::iterator end() + { + return values_.end(); + } + + typename BackendType::const_iterator begin() const + { + return values_.begin(); + } + + typename BackendType::const_iterator end() const + { + return values_.end(); + } + + template <class BinaryOperation> + ValueType combine(BinaryOperation op) const + { + return values_.combine(op); + } + +private: + mutable BackendType values_; +}; // class EnumerableThreadSpecificWrapper<ValueImp> + +#else // HAVE_TBB + +template <class ValueImp> +class EnumerableThreadSpecificWrapper +{ + using BackendType = std::array<std::remove_const_t<ValueImp>, 1>; + +public: + using ValueType = ValueImp; + using ConstValueType = std::add_const_t<ValueType>; + using iterator = typename BackendType::iterator; + using const_iterator = typename BackendType::const_iterator; + + //! Initialization by copy construction of ValueType + explicit EnumerableThreadSpecificWrapper(ConstValueType& value) + : values_{value} + { + } + + //! Initialization by in-place construction ValueType with \param ctor_args + template <class... InitTypes> + explicit EnumerableThreadSpecificWrapper(InitTypes&&... ctor_args) + : values_{ValueType(std::forward<InitTypes>(ctor_args)...)} + { + } + + ValueType& local() + { + return values_[0]; + } + + const ValueType& local() const + { + return values_[0]; + } + + iterator begin() + { + return values_.begin(); + } + + iterator end() + { + return values_.end(); + } + + const_iterator begin() const + { + return values_.begin(); + } + + const_iterator end() const + { + return values_.end(); + } + + template <class BinaryOperation> + ValueType combine(BinaryOperation /*op*/) const + { + return values_[0]; + } + +private: + BackendType values_; +}; // class EnumerableThreadSpecificWrapper<ValueImp> + +#endif // HAVE_TBB + + +} // namespace interal + /** Automatic Storage of non-static, N thread-local values **/ template <class ValueImp> -class PerThreadValue : public boost::noncopyable +class PerThreadValue +{ + using ContainerType = internal::EnumerableThreadSpecificWrapper<ValueImp>; + +public: + using ValueType = typename ContainerType::ValueType; + using ConstValueType = typename ContainerType::ConstValueType; + + //! Initialization by copy construction of ValueType + explicit PerThreadValue(ConstValueType& value) + : values_(value) + { + } + + //! Initialization by in-place construction ValueType with \param ctor_args + template <class... InitTypes> + explicit PerThreadValue(InitTypes&&... ctor_args) + : values_(std::forward<InitTypes>(ctor_args)...) + { + } + + operator ValueType() const + { + return values_.local(); + } + + ValueType& operator*() + { + return values_.local(); + } + + ConstValueType& operator*() const + { + return values_.local(); + } + + ValueType* operator->() + { + return &values_.local(); + } + + ConstValueType* operator->() const + { + return &values_.local(); + } + + template <class BinaryOperation> + ValueType accumulate(ValueType init, BinaryOperation op) const + { + return op(init, values_.combine(op)); + } + + ValueType sum() const + { + return accumulate(ValueType(0), std::plus<ValueType>()); + } + + typename ContainerType::iterator begin() + { + return values_.begin(); + } + typename ContainerType::iterator end() + { + return values_.end(); + } + + typename ContainerType::const_iterator begin() const + { + return values_.begin(); + } + typename ContainerType::const_iterator end() const + { + return values_.end(); + } + +private: + ContainerType values_; +}; // class PerThreadValue<ValueImp> + + +/** + * Previous implementation of PerThreadValue. This implementation suffers from the fact that it is not possible (or + * at least we did not find a way yet) to set a hard upper limit on the number of threads TBB uses. Setting max_threads + * via tbb::task_scheduler_init apparently only sets a soft limit on the number of threads. In addition, even if TBB + * uses only N threads at a time, it might be possible that a thread is destroyed and later in the program another + * thread with a different id replaces it, which will then get a number greater than or equal to N in our + * implementation (see ThreadManager::thread()). This occasionally leads to segfaults. + * We keep this implementation around as it is currently used by TimingData (see dune/xt/common/timings.hh) and the + * new implementation can't replace it in that context, as the new implementation based on + * tbb::enumerable_thread_specific lazily initalizes the values in each thread. + * \todo Either fix TimingData and remove this class or fix this class. + **/ +template <class ValueImp> +class UnsafePerThreadValue : public boost::noncopyable { public: typedef ValueImp ValueType; typedef typename std::conditional<std::is_const<ValueImp>::value, ValueImp, const ValueImp>::type ConstValueType; private: - typedef PerThreadValue<ValueImp> ThisType; + typedef UnsafePerThreadValue<ValueImp> ThisType; typedef std::deque<std::unique_ptr<ValueType>> ContainerType; public: //! Initialization by copy construction of ValueType - explicit PerThreadValue(ConstValueType& value) + explicit UnsafePerThreadValue(ConstValueType& value) : values_(threadManager().max_threads()) { - std::generate(values_.begin(), values_.end(), [=]() { return Common::make_unique<ValueType>(value); }); + std::generate(values_.begin(), values_.end(), [=]() { return std::make_unique<ValueType>(value); }); } //! Initialization by in-place construction ValueType with \param ctor_args template <class... InitTypes> - explicit PerThreadValue(InitTypes&&... ctor_args) + explicit UnsafePerThreadValue(InitTypes&&... ctor_args) : values_(threadManager().max_threads()) { for (auto&& val : values_) - val = Common::make_unique<ValueType>(ctor_args...); + val = std::make_unique<ValueType>(ctor_args...); } ThisType& operator=(ConstValueType&& value) { - std::generate(values_.begin(), values_.end(), [=]() { return Common::make_unique<ValueType>(value); }); + std::generate(values_.begin(), values_.end(), [=]() { return std::make_unique<ValueType>(value); }); return *this; } @@ -128,7 +371,8 @@ public: private: ContainerType values_; -}; +}; // class UnsafePerThreadValue<...> + template <class Imp, typename Result, class Reduction = std::plus<Result>> class ThreadResultPropagator @@ -167,6 +411,8 @@ private: Imp* imp_; std::list<Imp*> copies_; }; + + } // namespace Common } // namespace XT } // namespace Dune diff --git a/dune/xt/common/test/parallel.cc b/dune/xt/common/test/parallel.cc index 65dd8261ec4425c4b0eef061c626bd35de55e766..27c5bcc451275d46420877a6fc32f9765aa8d958 100644 --- a/dune/xt/common/test/parallel.cc +++ b/dune/xt/common/test/parallel.cc @@ -11,11 +11,11 @@ #include <dune/xt/common/test/main.hxx> -#include <string> -#include <memory> #include <array> -#include <initializer_list> +#include <thread> +#include <type_traits> #include <vector> + #include <dune/xt/common/parallel/threadmanager.hh> #include <dune/xt/common/parallel/threadstorage.hh> #include <dune/xt/common/parallel/helper.hh> @@ -80,7 +80,7 @@ TYPED_TEST(ThreadValueTest, All) typename PTVType::ValueType value(1); PTVType foo(value); check_eq(foo, value); - foo = typename PTVType::ValueType(1); + foo = PTVType(1); check_eq(foo, value); const auto new_value = *foo; const PTVType bar(*foo); @@ -89,12 +89,22 @@ TYPED_TEST(ThreadValueTest, All) { typename PTVType::ValueType zero(0); PTVType foo(zero); + size_t num_threads = Dune::XT::Common::threadManager().max_threads(); + std::vector<std::thread> threads(num_threads); + for (size_t ii = 0; ii < num_threads; ++ii) + threads[ii] = std::thread([&foo, &zero]() { EXPECT_EQ(*foo, zero); }); + for (size_t ii = 0; ii < num_threads; ++ii) + threads[ii].join(); auto sum = foo.accumulate(0, std::plus<typename PTVType::ValueType>()); - EXPECT_EQ(Dune::XT::Common::threadManager().max_threads() * zero, sum); + EXPECT_EQ(num_threads * zero, sum); typename PTVType::ValueType one = 1; PTVType bar(one); + for (size_t ii = 0; ii < num_threads; ++ii) + threads[ii] = std::thread([&bar, &one]() { EXPECT_EQ(*bar, one); }); + for (size_t ii = 0; ii < num_threads; ++ii) + threads[ii].join(); sum = bar.accumulate(0, std::plus<typename PTVType::ValueType>()); - EXPECT_EQ(Dune::XT::Common::threadManager().max_threads() * one, sum); + EXPECT_EQ(num_threads * one, sum); } } diff --git a/dune/xt/common/timings.cc b/dune/xt/common/timings.cc index b42f5b5635171ec9b676d67fb14840e6a247f818..ecde894b7a2e6e90337e059fabd8f3ff9c30fc9d 100644 --- a/dune/xt/common/timings.cc +++ b/dune/xt/common/timings.cc @@ -39,6 +39,7 @@ #include <dune/xt/common/filesystem.hh> #include <dune/xt/common/logging.hh> #include <dune/xt/common/parallel/threadmanager.hh> +#include <dune/xt/common/parallel/threadstorage.hh> #include <map> #include <string> @@ -97,7 +98,6 @@ void Timings::start(std::string section_name) if (section != known_timers_map_.end()) { if (section->second.first) // timer currently running return; - section->second.first = true; // set active, start with new section->second.second = TimingData(section_name); } else { diff --git a/dune/xt/common/timings.hh b/dune/xt/common/timings.hh index 2af4ad4ce3a930704cd75072516e78d6498589db..87220c07189303dcc2fd58a0c79847af8e2fdaf2 100644 --- a/dune/xt/common/timings.hh +++ b/dune/xt/common/timings.hh @@ -82,7 +82,7 @@ class Timings private: Timings(); - typedef std::map<std::string, std::pair<std::atomic<bool>, PerThreadValue<TimingData>>> KnownTimersMap; + typedef std::map<std::string, std::pair<std::atomic<bool>, UnsafePerThreadValue<TimingData>>> KnownTimersMap; //! section name -> seconds typedef std::map<std::string, TimingData::DeltaType> DeltaMap;