From cd750b7d962c0e54e132f48712367a7e95b537c1 Mon Sep 17 00:00:00 2001
From: Tobias Leibner <tobias.leibner@googlemail.com>
Date: Fri, 3 Apr 2020 12:38:33 +0200
Subject: [PATCH] [entropic-coords-mn] more parallelism

---
 .../reconstruction/linear_kinetic.hh          |  2 +-
 dune/gdt/spaces/basis/interface.hh            |  4 +-
 .../test/momentmodels/density_evaluations.hh  |  2 +-
 .../entropic-coords-mn-discretization.hh      |  9 ++-
 .../entropyflux_implementations.hh            | 71 +++++++++++--------
 .../momentmodels/entropyflux_kineticcoords.hh |  6 +-
 6 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/dune/gdt/operators/reconstruction/linear_kinetic.hh b/dune/gdt/operators/reconstruction/linear_kinetic.hh
index 5881e7a70..2221d6c09 100644
--- a/dune/gdt/operators/reconstruction/linear_kinetic.hh
+++ b/dune/gdt/operators/reconstruction/linear_kinetic.hh
@@ -153,7 +153,7 @@ public:
             range, grid_view, analytical_flux_, param);
     auto walker = XT::Grid::Walker<GV>(grid_view);
     walker.append(local_reconstruction_operator);
-    walker.walk(false);
+    walker.walk(true);
   } // void apply(...)
 
 private:
diff --git a/dune/gdt/spaces/basis/interface.hh b/dune/gdt/spaces/basis/interface.hh
index 95f9d1cd4..e401162b8 100644
--- a/dune/gdt/spaces/basis/interface.hh
+++ b/dune/gdt/spaces/basis/interface.hh
@@ -74,7 +74,7 @@ public:
     this->interpolate(element_function, order, dofs_);
     assert(dofs.size() >= sz);
     for (size_t ii = 0; ii < sz; ++ii)
-      dofs[ii] = dofs_[ii];
+      dofs.set_entry(ii, dofs_[ii]);
   } // ... interpolate(...)
 
   template <class V, class GV>
@@ -87,7 +87,7 @@ public:
     this->interpolate(element_function, dofs_);
     assert(dofs.size() >= sz);
     for (size_t ii = 0; ii < sz; ++ii)
-      dofs[ii] = dofs_[ii];
+      dofs.set_entry(ii, dofs_[ii]);
   } // ... interpolate(...)
 
   /// \}
diff --git a/dune/gdt/test/momentmodels/density_evaluations.hh b/dune/gdt/test/momentmodels/density_evaluations.hh
index 6691a04b9..308fee224 100644
--- a/dune/gdt/test/momentmodels/density_evaluations.hh
+++ b/dune/gdt/test/momentmodels/density_evaluations.hh
@@ -84,12 +84,12 @@ public:
   void apply_local(const EntityType& entity) override final
   {
     local_alpha_->bind(entity);
-    local_range_->bind(entity);
     const auto entity_index = index_set_.index(entity);
     const auto& local_alpha_dofs = local_alpha_->dofs();
     for (size_t ii = 0; ii < dimRange; ++ii)
       alpha_tmp_[ii] = local_alpha_dofs.get_entry(ii);
     analytical_flux_.store_evaluations(entity_index, alpha_tmp_, min_acceptable_density_);
+    local_range_->bind(entity);
     auto& local_range_dofs = local_range_->dofs();
     for (size_t ii = 0; ii < dimRange; ++ii)
       local_range_dofs.set_entry(ii, alpha_tmp_[ii]);
diff --git a/dune/gdt/test/momentmodels/entropic-coords-mn-discretization.hh b/dune/gdt/test/momentmodels/entropic-coords-mn-discretization.hh
index 903390c0c..f5b1a66b4 100644
--- a/dune/gdt/test/momentmodels/entropic-coords-mn-discretization.hh
+++ b/dune/gdt/test/momentmodels/entropic-coords-mn-discretization.hh
@@ -130,9 +130,8 @@ struct HyperbolicEntropicCoordsMnDiscretization
     // ***************** project initial values to discrete function *********************
     // create a discrete function for the solution
     DiscreteFunctionType u(fv_space, "u_initial");
-    // No mutexes needed because the only operator that would need synchronisation is the advection operator which will
-    // not be parallelised.
-    VectorType alpha_vec(fv_space.mapper().size(), 0., 0);
+    // The only operator that needs synchronisation is the advection operator
+    VectorType alpha_vec(fv_space.mapper().size(), 0., DXTC_CONFIG_GET("num_alpha_mutexes", 1));
     DiscreteFunctionType alpha(fv_space, alpha_vec, "alpha_initial");
     // project initial values
     default_interpolation(*initial_values_u, u, grid_view);
@@ -206,7 +205,7 @@ struct HyperbolicEntropicCoordsMnDiscretization
     // do not use parallelisation here, as the advection operator does almost no work (allows to use alpha_vec without
     // mutexes)
     AdvectionOperatorType advection_operator(
-        grid_view, numerical_flux, advection_source_space, fv_space, /*use_tbb*/ false);
+        grid_view, numerical_flux, advection_source_space, fv_space, /*use_tbb*/ true);
 
     // boundary treatment
     using BoundaryOperator =
@@ -299,7 +298,7 @@ struct HyperbolicEntropicCoordsMnDiscretization
       ret.axpy(Q_value, basis_integrated);
       auto& range_dofs = local_range.dofs();
       for (size_t ii = 0; ii < dimRange; ++ii)
-        range_dofs[ii] += ret[ii];
+        range_dofs.add_to_entry(ii, ret[ii]);
     };
     RhsOperatorType rhs_operator(grid_view, fv_space, fv_space, false, true);
     rhs_operator.append(GenericLocalElementOperator<VectorType, GV, dimRange>(rhs_func));
diff --git a/dune/gdt/test/momentmodels/entropyflux_implementations.hh b/dune/gdt/test/momentmodels/entropyflux_implementations.hh
index e80fd2545..59b577000 100644
--- a/dune/gdt/test/momentmodels/entropyflux_implementations.hh
+++ b/dune/gdt/test/momentmodels/entropyflux_implementations.hh
@@ -275,7 +275,7 @@ public:
   {
     auto& eta_ast_prime_vals = working_storage();
     evaluate_eta_ast_prime(beta_in, M, eta_ast_prime_vals);
-    return std::inner_product(
+    return XT::Common::transform_reduce(
         quad_weights_.begin(), quad_weights_.end(), eta_ast_prime_vals.begin(), RangeFieldType(0.));
   }
 
@@ -284,7 +284,8 @@ public:
   {
     auto& eta_ast_vals = working_storage();
     evaluate_eta_ast(beta_in, M, eta_ast_vals);
-    return std::inner_product(quad_weights_.begin(), quad_weights_.end(), eta_ast_vals.begin(), RangeFieldType(0.));
+    return XT::Common::transform_reduce(
+        quad_weights_.begin(), quad_weights_.end(), eta_ast_vals.begin(), RangeFieldType(0.));
   }
 
   // returns < b \eta_{\ast}^{\prime}(\alpha^T b(v)) >
@@ -740,7 +741,7 @@ public:
     const size_t num_quad_points = quad_points_.size();
     for (size_t ll = 0; ll < num_quad_points; ++ll) {
       const auto* basis_ll = &(M.get_entry_ref(ll, 0.));
-      scalar_products[ll] = std::inner_product(beta_in.begin(), beta_in.end(), basis_ll, 0.);
+      scalar_products[ll] = XT::Common::transform_reduce(beta_in.begin(), beta_in.end(), basis_ll, 0.);
     }
 #endif
   }
@@ -1741,7 +1742,7 @@ public:
     evaluate_eta_ast_prime(beta_in, M, eta_ast_prime_vals);
     RangeFieldType ret(0.);
     for (size_t jj = 0; jj < num_blocks; ++jj)
-      ret += std::inner_product(
+      ret += XT::Common::transform_reduce(
           quad_weights_[jj].begin(), quad_weights_[jj].end(), eta_ast_prime_vals[jj].begin(), RangeFieldType(0.));
     return ret;
   }
@@ -1753,7 +1754,7 @@ public:
     evaluate_eta_ast(beta_in, M, eta_ast_vals);
     RangeFieldType ret(0.);
     for (size_t jj = 0; jj < num_blocks; ++jj)
-      ret += std::inner_product(
+      ret += XT::Common::transform_reduce(
           quad_weights_[jj].begin(), quad_weights_[jj].end(), eta_ast_vals[jj].begin(), RangeFieldType(0.));
     return ret;
   }
@@ -2084,24 +2085,29 @@ public:
     evaluate_eta_ast_prime(alpha_i, M_, eta_ast_prime_vals);
     DomainType ret(0);
     for (size_t jj = 0; jj < num_blocks; ++jj) {
+      const auto& eta_ast_vals = eta_ast_prime_vals[jj];
+      const auto& weights = quad_weights_[jj];
+      const auto& points = quad_points_[jj];
+      const auto& basis = M_[jj];
+      const auto num_quad_points = weights.size();
       const auto offset = block_size * jj;
       if (quad_signs_[jj][dd] == 0) {
         // in this case, we have to decide which eta_ast_prime_vals to use for each quadrature point individually
-        for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
-          const auto position = quad_points_[jj][ll][dd];
+        for (size_t ll = 0; ll < num_quad_points; ++ll) {
+          const auto position = points[ll][dd];
           if (position * n_ij[dd] > 0.) {
-            const RangeFieldType factor = eta_ast_prime_vals[jj][ll] * quad_weights_[jj][ll] * position;
+            const RangeFieldType factor = eta_ast_vals[ll] * weights[ll] * position;
             for (size_t ii = 0; ii < block_size; ++ii)
-              ret[offset + ii] += M_[jj].get_entry(ll, ii) * factor;
+              ret[offset + ii] += basis.get_entry(ll, ii) * factor;
           }
         } // ll
       } else {
         // all quadrature points have the same sign
         if (quad_signs_[jj][dd] * n_ij[dd] > 0.) {
-          for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
-            const RangeFieldType factor = eta_ast_prime_vals[jj][ll] * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
+          for (size_t ll = 0; ll < num_quad_points; ++ll) {
+            const RangeFieldType factor = eta_ast_vals[ll] * weights[ll] * points[ll][dd];
             for (size_t ii = 0; ii < block_size; ++ii)
-              ret[offset + ii] += M_[jj].get_entry(ll, ii) * factor;
+              ret[offset + ii] += basis.get_entry(ll, ii) * factor;
           } // ll
         }
       } // quad_sign
@@ -2136,35 +2142,42 @@ public:
     RangeFieldType factor, slope;
     for (size_t jj = 0; jj < num_blocks; ++jj) {
       // calculate fluxes
+      const auto& weights = quad_weights_[jj];
+      const auto& points = quad_points_[jj];
+      const auto& basis = M_[jj];
+      const auto& psi_e = psi_entity[jj];
+      const auto& psi_l = psi_left[jj];
+      const auto& psi_r = psi_right[jj];
+      const auto num_quad_points = weights.size();
       const auto offset = block_size * jj;
       if (quad_signs_[jj][dd] == 0) {
         // in this case, we have to decide which flux_value to use for each quadrature point individually
-        for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
-          const auto position = quad_points_[jj][ll][dd];
+        for (size_t ll = 0; ll < num_quad_points; ++ll) {
+          const auto position = points[ll][dd];
           if constexpr (reconstruct) {
-            slope = slope_func(psi_entity[jj][ll] - psi_left[jj][ll], psi_right[jj][ll] - psi_entity[jj][ll]);
-            factor = position > 0 ? psi_entity[jj][ll] + 0.5 * slope : psi_entity[jj][ll] - 0.5 * slope;
-            factor *= quad_weights_[jj][ll] * position;
+            slope = slope_func(psi_e[ll] - psi_l[ll], psi_r[ll] - psi_e[ll]);
+            factor = position > 0 ? psi_e[ll] + 0.5 * slope : psi_e[ll] - 0.5 * slope;
+            factor *= weights[ll] * position;
           } else {
-            factor = psi_entity[jj][ll] * quad_weights_[jj][ll] * position;
+            factor = psi_e[ll] * weights[ll] * position;
           }
           auto& val = position > 0. ? right_flux_value : left_flux_value;
           for (size_t ii = 0; ii < block_size; ++ii)
-            val[offset + ii] += M_[jj].get_entry(ll, ii) * factor;
+            val[offset + ii] += basis.get_entry(ll, ii) * factor;
         } // ll
       } else {
         // all quadrature points have the same sign
         auto& flux_val = quad_signs_[jj][dd] > 0 ? right_flux_value : left_flux_value;
         const double sign_factor = quad_signs_[jj][dd] > 0 ? 0.5 : -0.5;
-        for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
+        for (size_t ll = 0; ll < num_quad_points; ++ll) {
           if constexpr (reconstruct) {
-            slope = slope_func(psi_entity[jj][ll] - psi_left[jj][ll], psi_right[jj][ll] - psi_entity[jj][ll]);
-            factor = (psi_entity[jj][ll] + sign_factor * slope) * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
+            slope = slope_func(psi_e[ll] - psi_l[ll], psi_r[ll] - psi_e[ll]);
+            factor = (psi_e[ll] + sign_factor * slope) * weights[ll] * points[ll][dd];
           } else {
-            factor = psi_entity[jj][ll] * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
+            factor = psi_e[ll] * weights[ll] * points[ll][dd];
           }
           for (size_t ii = 0; ii < block_size; ++ii)
-            flux_val[offset + ii] += M_[jj].get_entry(ll, ii) * factor;
+            flux_val[offset + ii] += basis.get_entry(ll, ii) * factor;
         } // ll
       } // quad_sign
     } // jj
@@ -2289,7 +2302,7 @@ public:
     const size_t num_quad_points = quad_points_[jj].size();
     for (size_t ll = 0; ll < num_quad_points; ++ll) {
       const auto* basis_ll = &(M.get_entry_ref(ll, 0.));
-      scalar_products[ll] = std::inner_product(beta_in.begin(), beta_in.end(), basis_ll, 0.);
+      scalar_products[ll] = XT::Common::transform_reduce(beta_in.begin(), beta_in.end(), basis_ll, 0.);
     } // ll
   }
 
@@ -2801,7 +2814,7 @@ public:
     evaluate_eta_ast_prime(alpha, eta_ast_prime_vals);
     RangeFieldType ret(0.);
     for (size_t jj = 0; jj < num_faces_; ++jj)
-      ret += std::inner_product(
+      ret += XT::Common::transform_reduce(
           quad_weights_[jj].begin(), quad_weights_[jj].end(), eta_ast_prime_vals[jj].begin(), RangeFieldType(0.));
     return ret;
   }
@@ -2819,7 +2832,7 @@ public:
     evaluate_eta_ast(alpha, eta_ast_vals);
     RangeFieldType ret(0.);
     for (size_t jj = 0; jj < num_faces_; ++jj)
-      ret += std::inner_product(
+      ret += XT::Common::transform_reduce(
           quad_weights_[jj].begin(), quad_weights_[jj].end(), eta_ast_vals[jj].begin(), RangeFieldType(0.));
     return ret;
   }
@@ -4983,7 +4996,7 @@ public:
     evaluate_eta_ast_prime(alpha, eta_ast_prime_vals);
     RangeFieldType ret(0.);
     for (size_t jj = 0; jj < num_intervals; ++jj)
-      ret += std::inner_product(
+      ret += XT::Common::transform_reduce(
           quad_weights_[jj].begin(), quad_weights_[jj].end(), eta_ast_prime_vals[jj].begin(), RangeFieldType(0.));
     return ret;
   }
@@ -4995,7 +5008,7 @@ public:
     evaluate_eta_ast(alpha, eta_ast_vals);
     RangeFieldType ret(0.);
     for (size_t jj = 0; jj < num_intervals; ++jj)
-      ret += std::inner_product(
+      ret += XT::Common::transform_reduce(
           quad_weights_[jj].begin(), quad_weights_[jj].end(), eta_ast_vals[jj].begin(), RangeFieldType(0.));
     return ret;
   }
diff --git a/dune/gdt/test/momentmodels/entropyflux_kineticcoords.hh b/dune/gdt/test/momentmodels/entropyflux_kineticcoords.hh
index 92ba56205..49ddb47a3 100644
--- a/dune/gdt/test/momentmodels/entropyflux_kineticcoords.hh
+++ b/dune/gdt/test/momentmodels/entropyflux_kineticcoords.hh
@@ -229,8 +229,8 @@ public:
                                                    eta_ast_twoprime_storage_[entity_index]);
     }
     // check for inf and nan and very low densities
-    u_[entity_index] = get_u(entity_index);
-    const auto& u = u_[entity_index];
+    auto& u = u_[entity_index];
+    u = get_u(entity_index);
     if (check) {
       const double* u_ptr = &(u[0]);
       const auto val = XT::Common::reduce(u_ptr, u_ptr + basis_dimRange, 0.);
@@ -246,7 +246,7 @@ public:
       //     // alpha, rho_min, basis_functions().needs_rho_for_min_density() ? basis_functions().density(u) : 0., u);
       // if (changed)
       //   store_evaluations(entity_index, alpha, rho_min, false);
-    }
+    } // if (check)
   }
 
   void set_eta_ast_pointers()
-- 
GitLab