From 46f49ead600e3a20b4c60ed197fca304dcd553f0 Mon Sep 17 00:00:00 2001
From: Tobias Leibner <tobias.leibner@googlemail.com>
Date: Wed, 27 Mar 2019 16:25:34 +0100
Subject: [PATCH] [momentmodels] start cleaning up

---
 dune/gdt/momentmodels/entropybased_flux.hh    | 6695 +++++++----------
 dune/gdt/momentmodels/entropysolver.hh        |   21 +-
 ...nts_fv.hh => advection-fv-entropybased.hh} |    0
 dune/gdt/operators/reconstruction/slopes.hh   |   98 +-
 dune/gdt/test/mn-discretization.hh            |   28 +-
 dune/gdt/test/momentmodels/kineticequation.hh |   20 +-
 .../momentmodels/kinetictransport/base.hh     |   12 +-
 .../kinetictransport/checkerboard.hh          |   20 +-
 .../kinetictransport/planesource.hh           |   20 +-
 .../kinetictransport/pointsource.hh           |   20 +-
 .../momentmodels/kinetictransport/shadow.hh   |   20 +-
 .../kinetictransport/sourcebeam.hh            |   32 +-
 .../kinetictransport/testcases.hh             |  206 +-
 dune/gdt/test/pn-discretization.hh            |   26 +-
 dune/gdt/{ => tools}/timestepper/enums.hh     |    0
 .../timestepper/explicit-rungekutta.hh        |    0
 .../timestepper/fractional-step.hh            |    0
 dune/gdt/{ => tools}/timestepper/interface.hh |    0
 .../matrix-exponential-kinetic-isotropic.hh   |   16 +-
 19 files changed, 3070 insertions(+), 4164 deletions(-)
 rename dune/gdt/operators/{entropybasedmoments_fv.hh => advection-fv-entropybased.hh} (100%)
 rename dune/gdt/{ => tools}/timestepper/enums.hh (100%)
 rename dune/gdt/{ => tools}/timestepper/explicit-rungekutta.hh (100%)
 rename dune/gdt/{ => tools}/timestepper/fractional-step.hh (100%)
 rename dune/gdt/{ => tools}/timestepper/interface.hh (100%)
 rename dune/gdt/{ => tools}/timestepper/matrix-exponential-kinetic-isotropic.hh (90%)

diff --git a/dune/gdt/momentmodels/entropybased_flux.hh b/dune/gdt/momentmodels/entropybased_flux.hh
index 53d29a9d4..cefe995ff 100644
--- a/dune/gdt/momentmodels/entropybased_flux.hh
+++ b/dune/gdt/momentmodels/entropybased_flux.hh
@@ -50,34 +50,28 @@ namespace Dune {
 namespace GDT {
 
 
-#if 1
-/** Analytical flux \mathbf{f}(\mathbf{u}) = < \mu \mathbf{m} G_{\hat{\alpha}(\mathbf{u})} >,
- * for the notation see
- * Alldredge, Hauck, O'Leary, Tits, "Adaptive change of basis in entropy-based moment closures for linear kinetic
- * equations"
- */
-template <class BasisfunctionImp>
-class EntropyBasedLocalFlux
-  : public XT::Functions::FunctionInterface<BasisfunctionImp::dimRange,
-                                            BasisfunctionImp::dimDomain,
-                                            BasisfunctionImp::dimRange,
-                                            typename BasisfunctionImp::R>
+template <class MomentBasisImp>
+class EntropyBasedFluxImplementationUnspecializedBase
+  : public XT::Functions::FunctionInterface<MomentBasisImp::dimRange,
+                                            MomentBasisImp::dimDomain,
+                                            MomentBasisImp::dimRange,
+                                            typename MomentBasisImp::R>
 {
-  using BaseType = typename XT::Functions::FunctionInterface<BasisfunctionImp::dimRange,
-                                                             BasisfunctionImp::dimDomain,
-                                                             BasisfunctionImp::dimRange,
-                                                             typename BasisfunctionImp::R>;
-  using ThisType = EntropyBasedLocalFlux;
+  using BaseType = typename XT::Functions::FunctionInterface<MomentBasisImp::dimRange,
+                                                             MomentBasisImp::dimDomain,
+                                                             MomentBasisImp::dimRange,
+                                                             typename MomentBasisImp::R>;
+  using ThisType = EntropyBasedFluxImplementationUnspecializedBase;
 
 public:
-  using BasisfunctionType = BasisfunctionImp;
+  using MomentBasis = MomentBasisImp;
   using BaseType::d;
   using BaseType::r;
-  static const size_t basis_dimDomain = BasisfunctionType::dimDomain;
-  static const size_t basis_dimRange = BasisfunctionType::dimRange;
+  static const size_t basis_dimDomain = MomentBasis::dimDomain;
+  static const size_t basis_dimRange = MomentBasis::dimRange;
   using typename BaseType::DerivativeRangeReturnType;
   using typename BaseType::DomainFieldType;
-  using BasisDomainType = typename BasisfunctionType::DomainType;
+  using BasisDomainType = typename MomentBasis::DomainType;
   using typename BaseType::DomainType;
   using typename BaseType::RangeFieldType;
   using typename BaseType::RangeReturnType;
@@ -91,71 +85,15 @@ public:
   using BasisValuesMatrixType = XT::LA::CommonDenseMatrix<RangeFieldType>;
   using AlphaReturnType = std::pair<VectorType, std::pair<DomainType, RangeFieldType>>;
 
-  // get permutation instead of sorting directly to be able to sort two vectors the same way
-  // see
-  // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of
-  template <typename T, typename Compare>
-  std::vector<std::size_t> get_sort_permutation(const std::vector<T>& vec, const Compare& compare)
-  {
-    std::vector<std::size_t> p(vec.size());
-    std::iota(p.begin(), p.end(), 0);
-    std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); });
-    return p;
-  }
-
-  template <typename T>
-  void apply_permutation_in_place(std::vector<T>& vec, const std::vector<std::size_t>& p)
-  {
-    std::vector<bool> done(vec.size());
-    for (std::size_t i = 0; i < vec.size(); ++i) {
-      if (done[i]) {
-        continue;
-      }
-      done[i] = true;
-      std::size_t prev_j = i;
-      std::size_t j = p[i];
-      while (i != j) {
-        std::swap(vec[prev_j], vec[j]);
-        done[j] = true;
-        prev_j = j;
-        j = p[j];
-      }
-    }
-  }
-
-  // Joins duplicate quadpoints, vectors have to be sorted!
-  void join_duplicate_quadpoints(std::vector<BasisDomainType>& quad_points, std::vector<RangeFieldType>& quad_weights)
-  {
-    // Index of first quad_point of several quad_points with the same position
-    size_t curr_index = 0;
-    std::vector<size_t> indices_to_remove;
-    for (size_t ll = 1; ll < quad_weights.size(); ++ll) {
-      if (XT::Common::FloatCmp::eq(quad_points[curr_index], quad_points[ll])) {
-        quad_weights[curr_index] += quad_weights[ll];
-        indices_to_remove.push_back(ll);
-      } else {
-        curr_index = ll;
-      }
-    } // ll
-    assert(indices_to_remove.size() < std::numeric_limits<int>::max());
-    // remove duplicate points, from back to front to avoid invalidating indices
-    for (int ll = static_cast<int>(indices_to_remove.size()) - 1; ll >= 0; --ll) {
-      quad_points.erase(quad_points.begin() + indices_to_remove[ll]);
-      quad_weights.erase(quad_weights.begin() + indices_to_remove[ll]);
-    }
-  }
-
-  explicit EntropyBasedLocalFlux(
-      const BasisfunctionType& basis_functions,
-      const RangeFieldType tau = 1e-9,
-      const RangeFieldType epsilon_gamma = 0.01,
-      const RangeFieldType chi = 0.5,
-      const RangeFieldType xi = 1e-3,
-      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
-      const size_t k_0 = 500,
-      const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const std::string name = static_id())
+  explicit EntropyBasedFluxImplementationUnspecializedBase(const MomentBasis& basis_functions,
+                                                           const RangeFieldType tau,
+                                                           const RangeFieldType epsilon_gamma,
+                                                           const RangeFieldType chi,
+                                                           const RangeFieldType xi,
+                                                           const std::vector<RangeFieldType> r_sequence,
+                                                           const size_t k_0,
+                                                           const size_t k_max,
+                                                           const RangeFieldType epsilon)
     : basis_functions_(basis_functions)
     , quad_points_(XT::Data::merged_quadrature(basis_functions_.quadratures()).size())
     , quad_weights_(quad_points_.size())
@@ -168,17 +106,14 @@ public:
     , k_0_(k_0)
     , k_max_(k_max)
     , epsilon_(epsilon)
-    , T_minus_one_(std::make_unique<MatrixType>())
-    , name_(name)
     , realizability_helper_(basis_functions_,
                             quad_points_
-#  if HAVE_CLP
+#if HAVE_CLP
                             ,
                             lp_
-#  endif
+#endif
       )
   {
-    XT::LA::eye_matrix(*T_minus_one_);
     size_t ll = 0;
     for (const auto& quad_point : XT::Data::merged_quadrature(basis_functions_.quadratures())) {
       quad_points_[ll] = quad_point.position();
@@ -207,12 +142,7 @@ public:
     return 1;
   }
 
-  static std::string static_id()
-  {
-    return "gdt.entropybasedflux";
-  }
-
-  DomainType get_initial_alpha(const DomainType& u) const
+  VectorType get_isotropic_alpha(const DomainType& u) const
   {
     static const auto alpha_iso = basis_functions_.alpha_iso();
     static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
@@ -222,11 +152,11 @@ public:
   virtual RangeReturnType evaluate(const DomainType& u,
                                    const XT::Common::Parameter& /*param*/ = {}) const override final
   {
-    const auto alpha = get_alpha(u, get_initial_alpha(u), true)->first;
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
     return evaluate_with_alpha(alpha);
   }
 
-  virtual RangeReturnType evaluate_with_alpha(const DomainType& alpha) const
+  virtual RangeReturnType evaluate_with_alpha(const VectorType& alpha) const
   {
     RangeReturnType ret(0.);
     auto& work_vecs = working_storage();
@@ -246,11 +176,11 @@ public:
   virtual DerivativeRangeReturnType jacobian(const DomainType& u,
                                              const XT::Common::Parameter& /*param*/ = {}) const override final
   {
-    const auto alpha = get_alpha(u, get_initial_alpha(u), true)->first;
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
     return jacobian_with_alpha(alpha);
   }
 
-  virtual DerivativeRangeReturnType jacobian_with_alpha(const DomainType& alpha) const
+  virtual DerivativeRangeReturnType jacobian_with_alpha(const VectorType& alpha) const
   {
     DerivativeRangeReturnType ret;
     thread_local auto H = XT::Common::make_unique<MatrixType>();
@@ -269,13 +199,13 @@ public:
                                    const size_t dd) const
   {
     // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
-    const auto alpha_i = get_alpha(u_i, get_initial_alpha(u_i), true)->first;
-    const auto alpha_j = get_alpha(u_j, get_initial_alpha(u_j), true)->first;
+    const auto alpha_i = get_alpha(u_i, get_isotropic_alpha(u_i), true)->first;
+    const auto alpha_j = get_alpha(u_j, get_isotropic_alpha(u_j), true)->first;
     evaluate_kinetic_flux_with_alphas(alpha_i, alpha_j, n_ij, dd);
   } // DomainType evaluate_kinetic_flux(...)
 
-  DomainType evaluate_kinetic_flux_with_alphas(const DomainType& alpha_i,
-                                               const DomainType& alpha_j,
+  DomainType evaluate_kinetic_flux_with_alphas(const VectorType& alpha_i,
+                                               const VectorType& alpha_j,
                                                const BasisDomainType& n_ij,
                                                const size_t dd) const
 
@@ -298,149 +228,78 @@ public:
     return ret;
   } // DomainType evaluate_kinetic_flux_with_alphas(...)
 
-  const BasisfunctionType& basis_functions() const
+  const MomentBasis& basis_functions() const
   {
     return basis_functions_;
   }
 
   // returns (alpha, (actual_u, r)), where r is the regularization parameter and actual_u the regularized u
-  std::unique_ptr<AlphaReturnType>
-  get_alpha(const DomainType& u, const DomainType& alpha_in, const bool regularize) const
-  {
-    // get initial multiplier and basis matrix from last time step
-    auto ret = std::make_unique<AlphaReturnType>();
-
-    // rescale u such that the density <psi> is 1
-    RangeFieldType density = basis_functions_.density(u);
-    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
-    if (!(density > 0.) || std::isinf(density))
-      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
-
-    VectorType u_prime = u / density;
-    VectorType alpha_initial = alpha_in - alpha_iso_prime * std::log(density);
-    VectorType beta_in = alpha_initial;
-    VectorType v, u_eps_diff, g_k, beta_out;
-    RangeFieldType first_error_cond, second_error_cond, tau_prime;
-
-    auto u_iso = basis_functions_.u_iso();
-    const RangeFieldType dim_factor = is_full_moment_basis<BasisfunctionType>::value ? 1. : std::sqrt(basis_dimDomain);
-    tau_prime = std::min(tau_ / ((1 + dim_factor * u_prime.two_norm()) * density + dim_factor * tau_), tau_);
+  virtual std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const = 0;
 
-    thread_local auto T_k = XT::Common::make_unique<MatrixType>();
+protected:
+  // get permutation instead of sorting directly to be able to sort two vectors the same way
+  // see
+  // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of
+  template <typename T, typename Compare>
+  static std::vector<std::size_t> get_sort_permutation(const std::vector<T>& vec, const Compare& compare)
+  {
+    std::vector<std::size_t> p(vec.size());
+    std::iota(p.begin(), p.end(), 0);
+    std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); });
+    return p;
+  }
 
-    const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
-    const auto r_max = r_sequence.back();
-    for (const auto& r : r_sequence) {
-      // regularize u
-      v = u_prime;
-      if (r > 0) {
-        beta_in = alpha_initial;
-        DynamicRangeType r_times_u_iso = u_iso;
-        r_times_u_iso *= r;
-        v *= 1 - r;
-        v += r_times_u_iso;
+  template <typename T>
+  static void apply_permutation_in_place(std::vector<T>& vec, const std::vector<std::size_t>& p)
+  {
+    std::vector<bool> done(vec.size());
+    for (std::size_t i = 0; i < vec.size(); ++i) {
+      if (done[i]) {
+        continue;
       }
-      *T_k = *T_minus_one_;
-      // calculate T_k u
-      VectorType v_k = v;
-      // calculate values of basis p = S_k m
-      thread_local BasisValuesMatrixType P_k(M_.backend(), false, 0., 0);
-      std::copy_n(M_.data(), M_.rows() * M_.cols(), P_k.data());
-      // calculate f_0
-      RangeFieldType f_k = calculate_scalar_integral(beta_in, P_k);
-      f_k -= beta_in * v_k;
-
-      thread_local auto H = XT::Common::make_unique<MatrixType>(0.);
-
-      int pure_newton = 0;
-      for (size_t kk = 0; kk < k_max_; ++kk) {
-        // exit inner for loop to increase r if too many iterations are used or cholesky decomposition fails
-        if (kk > k_0_ && r < r_max)
-          break;
-        try {
-          change_basis(beta_in, v_k, P_k, *T_k, g_k, beta_out, *H);
-        } catch (const Dune::MathError&) {
-          if (r < r_max)
-            break;
-          const std::string err_msg =
-              "Failed to converge for " + XT::Common::to_string(u) + " with density " + XT::Common::to_string(density)
-              + " and multiplier " + XT::Common::to_string(beta_in)
-              + " due to errors in change_basis! Last u_eps_diff = " + XT::Common::to_string(u_eps_diff)
-              + ", first_error_cond = " + XT::Common::to_string(first_error_cond) + ", second_error_cond = "
-              + XT::Common::to_string(second_error_cond) + ", tau_prime = " + XT::Common::to_string(tau_prime);
-          DUNE_THROW(MathError, err_msg);
-        }
-        // calculate descent direction d_k;
-        VectorType d_k = g_k;
-        d_k *= -1;
-        // Calculate stopping criteria (in original basis). Variables with _k are in current basis, without k in
-        // original basis.
-        VectorType alpha_tilde;
-        XT::LA::solve_lower_triangular_transposed(*T_k, alpha_tilde, beta_out);
-        VectorType u_alpha_tilde;
-        calculate_vector_integral(alpha_tilde, M_, M_, u_alpha_tilde);
-        VectorType g_alpha_tilde = u_alpha_tilde - v;
-        auto density_tilde = basis_functions_.density(u_alpha_tilde);
-        if (!(density_tilde > 0.) || std::isinf(density_tilde))
-          break;
-        const auto alpha_prime = alpha_tilde - alpha_iso_prime * std::log(density_tilde);
-        VectorType u_alpha_prime;
-        calculate_vector_integral(alpha_prime, M_, M_, u_alpha_prime);
-        u_eps_diff = v - u_alpha_prime * (1 - epsilon_gamma_);
-        VectorType d_alpha_tilde;
-        XT::LA::solve_lower_triangular_transposed(*T_k, d_alpha_tilde, d_k);
-        first_error_cond = g_alpha_tilde.two_norm();
-        second_error_cond = std::exp(d_alpha_tilde.one_norm() + std::abs(std::log(density_tilde)));
-        if (first_error_cond < tau_prime && 1 - epsilon_gamma_ < second_error_cond
-            && realizability_helper_.is_realizable(u_eps_diff, kk == static_cast<size_t>(0.8 * k_0_))) {
-          ret->first = alpha_prime + alpha_iso_prime * std::log(density);
-          VectorType u_ret;
-          calculate_vector_integral(ret->first, M_, M_, u_ret);
-          ret->second = std::make_pair(v * density, r);
-          return ret;
-        } else {
-          RangeFieldType zeta_k = 1;
-          beta_in = beta_out;
-          // backtracking line search
-          // while (pure_newton >= 2 || zeta_k > epsilon_ * beta_out.two_norm() / d_k.two_norm() * 100.) {
-          while (pure_newton >= 2 || zeta_k > epsilon_ * beta_out.two_norm() / d_k.two_norm()) {
-            VectorType beta_new = d_k;
-            beta_new *= zeta_k;
-            beta_new += beta_out;
-            RangeFieldType f = calculate_scalar_integral(beta_new, P_k);
-            f -= beta_new * v_k;
-            if (pure_newton >= 2 || XT::Common::FloatCmp::le(f, f_k + xi_ * zeta_k * (g_k * d_k))) {
-              beta_in = beta_new;
-              f_k = f;
-              pure_newton = 0;
-              break;
-            }
-            zeta_k = chi_ * zeta_k;
-          } // backtracking linesearch while
-          if (zeta_k <= epsilon_ * beta_out.two_norm() / d_k.two_norm())
-            ++pure_newton;
-        } // else (stopping conditions)
-      } // k loop (Newton iterations)
-    } // r loop (Regularization parameter)
-    const std::string err_msg = "Failed to converge for " + XT::Common::to_string(u) + " with density "
-                                + XT::Common::to_string(density) + " and multiplier " + XT::Common::to_string(beta_in)
-                                + " due to too many iterations! Last u_eps_diff = " + XT::Common::to_string(u_eps_diff)
-                                + ", first_error_cond = " + XT::Common::to_string(first_error_cond)
-                                + ", second_error_cond = " + XT::Common::to_string(second_error_cond)
-                                + ", tau_prime = " + XT::Common::to_string(tau_prime);
-    DUNE_THROW(MathError, err_msg);
+      done[i] = true;
+      std::size_t prev_j = i;
+      std::size_t j = p[i];
+      while (i != j) {
+        std::swap(vec[prev_j], vec[j]);
+        done[j] = true;
+        prev_j = j;
+        j = p[j];
+      }
+    }
+  }
 
-    return ret;
+  // Joins duplicate quadpoints, vectors have to be sorted!
+  static void join_duplicate_quadpoints(std::vector<BasisDomainType>& quad_points,
+                                        std::vector<RangeFieldType>& quad_weights)
+  {
+    // Index of first quad_point of several quad_points with the same position
+    size_t curr_index = 0;
+    std::vector<size_t> indices_to_remove;
+    for (size_t ll = 1; ll < quad_weights.size(); ++ll) {
+      if (XT::Common::FloatCmp::eq(quad_points[curr_index], quad_points[ll])) {
+        quad_weights[curr_index] += quad_weights[ll];
+        indices_to_remove.push_back(ll);
+      } else {
+        curr_index = ll;
+      }
+    } // ll
+    assert(indices_to_remove.size() < std::numeric_limits<int>::max());
+    // remove duplicate points, from back to front to avoid invalidating indices
+    for (int ll = static_cast<int>(indices_to_remove.size()) - 1; ll >= 0; --ll) {
+      quad_points.erase(quad_points.begin() + indices_to_remove[ll]);
+      quad_weights.erase(quad_weights.begin() + indices_to_remove[ll]);
+    }
   }
 
-private:
-#  if HAVE_CLP
-  template <class BasisFuncImp = BasisfunctionType, bool anything = true>
+#if HAVE_CLP
+  template <class BasisFuncImp = MomentBasis, bool anything = true>
   struct RealizabilityHelper
   {
-    static_assert(std::is_same<BasisFuncImp, BasisfunctionType>::value, "BasisFuncImp has to be BasisfunctionType!");
+    static_assert(std::is_same<BasisFuncImp, MomentBasis>::value, "BasisFuncImp has to be MomentBasis!");
 
-    RealizabilityHelper(const BasisfunctionType& basis_functions,
+    RealizabilityHelper(const MomentBasis& basis_functions,
                         const std::vector<BasisDomainType>& quad_points,
                         XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& lp)
       : basis_functions_(basis_functions)
@@ -507,16 +366,15 @@ private:
     }
 
   private:
-    const BasisfunctionType& basis_functions_;
+    const MomentBasis& basis_functions_;
     const std::vector<BasisDomainType>& quad_points_;
     XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& lp_;
   }; // struct RealizabilityHelper<...>
-#  else // HAVE_CLP
-  template <class BasisFuncImp = BasisfunctionType, bool anything = true>
+#else // HAVE_CLP
+  template <class BasisFuncImp = MomentBasis, bool anything = true>
   struct RealizabilityHelper
   {
-    RealizabilityHelper(const BasisfunctionType& /*basis_functions*/,
-                        const std::vector<BasisDomainType>& /*quad_points*/)
+    RealizabilityHelper(const MomentBasis& /*basis_functions*/, const std::vector<BasisDomainType>& /*quad_points*/)
     {
       DUNE_THROW(Dune::NotImplemented, "You are missing Clp!");
     }
@@ -527,7 +385,7 @@ private:
       return false;
     }
   }; // struct RealizabilityHelper<...>
-#  endif // HAVE_CLP
+#endif // HAVE_CLP
 
   // specialization for hatfunctions
   template <size_t dimRange_or_refinements, bool anything>
@@ -539,14 +397,14 @@ private:
                                                     basis_dimDomain>,
                              anything>
   {
-    RealizabilityHelper(const BasisfunctionType& /*basis_functions*/,
+    RealizabilityHelper(const MomentBasis& /*basis_functions*/,
                         const std::vector<BasisDomainType>& /*quad_points*/
-#  if HAVE_CLP
+#if HAVE_CLP
                         ,
                         XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& /*lp*/)
-#  else
+#else
     )
-#  endif
+#endif
     {}
 
     static bool is_realizable(const DomainType& u, const bool /*reinitialize*/)
@@ -570,7 +428,7 @@ private:
                                  const BasisValuesMatrixType& M,
                                  std::vector<RangeFieldType>& scalar_products) const
   {
-#  if HAVE_MKL || HAVE_CBLAS
+#if HAVE_MKL || HAVE_CBLAS
     XT::Common::Blas::dgemv(XT::Common::Blas::row_major(),
                             XT::Common::Blas::no_trans(),
                             static_cast<int>(quad_points_.size()),
@@ -583,14 +441,14 @@ private:
                             0.,
                             scalar_products.data(),
                             1);
-#  else
+#else
     const size_t num_quad_points = quad_points_.size();
     std::fill(scalar_products.begin(), scalar_products.end(), 0.);
     for (size_t ll = 0; ll < num_quad_points; ++ll) {
       const auto* basis_ll = M.get_ptr(ll);
       scalar_products[ll] = std::inner_product(beta_in.begin(), beta_in.end(), basis_ll, 0.);
     }
-#  endif
+#endif
   }
 
   void apply_exponential(std::vector<RangeFieldType>& values) const
@@ -642,7 +500,7 @@ private:
   // calculate (T_k^{-1} M^T)^T = M T_k^{-T}
   void apply_inverse_matrix(const MatrixType& T_k, BasisValuesMatrixType& M) const
   {
-#  if HAVE_MKL || HAVE_CBLAS
+#if HAVE_MKL || HAVE_CBLAS
     // Calculate the transpose here first as this is much faster than passing the matrix to dtrsm and using CblasTrans
     thread_local auto T_k_trans = std::make_unique<MatrixType>(0.);
     copy_transposed(T_k, *T_k_trans);
@@ -659,7 +517,7 @@ private:
                             basis_dimRange,
                             M.data(),
                             matrix_num_cols);
-#  else
+#else
     assert(quad_points_.size() == M.rows());
     VectorType tmp_vec, tmp_vec2;
     for (size_t ll = 0; ll < quad_points_.size(); ++ll) {
@@ -667,10 +525,9 @@ private:
       XT::LA::solve_lower_triangular(T_k, tmp_vec2, tmp_vec);
       std::copy_n(tmp_vec2.begin(), basis_dimRange, M.get_ptr(ll));
     }
-#  endif
+#endif
   }
 
-
   template <size_t domainDim = basis_dimDomain, class anything = void>
   struct helper
   {
@@ -723,13 +580,18 @@ private:
     } // ii
   } // void calculate_A_Binv(...)
 
-  void calculate_hessian(const VectorType& alpha, const BasisValuesMatrixType& M, MatrixType& H) const
+  void calculate_hessian(const VectorType& alpha,
+                         const BasisValuesMatrixType& M,
+                         MatrixType& H,
+                         const bool use_work_vec_data = false) const
   {
     std::fill(H.begin(), H.end(), 0.);
     auto& work_vec = working_storage();
-    calculate_scalar_products(alpha, M, work_vec);
-    apply_exponential(work_vec);
-    const size_t num_quad_points = quad_weights_.size();
+    if (!use_work_vec_data) {
+      calculate_scalar_products(alpha, M, work_vec);
+      apply_exponential(work_vec);
+    }
+    const size_t num_quad_points = quad_weights_.size();
     // matrix is symmetric, we only use lower triangular part
     for (size_t ll = 0; ll < num_quad_points; ++ll) {
       auto factor_ll = work_vec[ll] * quad_weights_[ll];
@@ -801,8 +663,7 @@ private:
     g_k -= v_k;
   } // void change_basis(...)
 
-private:
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   std::vector<BasisDomainType> quad_points_;
   std::vector<RangeFieldType> quad_weights_;
   BasisValuesMatrixType M_;
@@ -814,271 +675,97 @@ private:
   const size_t k_0_;
   const size_t k_max_;
   const RangeFieldType epsilon_;
-  const std::unique_ptr<MatrixType> T_minus_one_;
-  const std::string name_;
   const RealizabilityHelper<> realizability_helper_;
-#  if HAVE_CLP
+#if HAVE_CLP
   mutable XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>> lp_;
-#  endif
-};
 #endif
+};
 
 
 #if 1
-/**
- * Specialization for DG basis
+/** Analytical flux \mathbf{f}(\mathbf{u}) = < \mu \mathbf{m} G_{\hat{\alpha}(\mathbf{u})} >,
+ * for the notation see
+ * Alldredge, Hauck, O'Leary, Tits, "Adaptive change of basis in entropy-based moment closures for linear kinetic
+ * equations"
  */
-template <class D, size_t d, class R, size_t dimRange_or_refinements>
-class EntropyBasedLocalFlux<PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>>
-  : public XT::Functions::FunctionInterface<PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>::dimRange,
-                                            d,
-                                            PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>::dimRange,
-                                            R>
+template <class MomentBasisImp>
+class EntropyBasedFluxImplementation : public EntropyBasedFluxImplementationUnspecializedBase<MomentBasisImp>
 {
+  using BaseType = EntropyBasedFluxImplementationUnspecializedBase<MomentBasisImp>;
+  using ThisType = EntropyBasedFluxImplementation;
+
 public:
-  using BasisfunctionType = PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>;
-  using BaseType = typename XT::Functions::
-      FunctionInterface<BasisfunctionType::dimRange, BasisfunctionType::dimDomain, BasisfunctionType::dimRange, R>;
-  using ThisType = EntropyBasedLocalFlux;
-  using BaseType::d;
-  using BaseType::r;
-  static const size_t basis_dimDomain = BasisfunctionType::dimDomain;
-  static const size_t basis_dimRange = BasisfunctionType::dimRange;
-  using typename BaseType::DerivativeRangeReturnType;
-  using typename BaseType::DomainFieldType;
+  using BaseType::basis_dimDomain;
+  using typename BaseType::AlphaReturnType;
+  using typename BaseType::BasisValuesMatrixType;
   using typename BaseType::DomainType;
+  using typename BaseType::MatrixType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
-  using typename BaseType::RangeReturnType;
-  using typename BaseType::RowDerivativeRangeReturnType;
-  using BasisDomainType = typename BasisfunctionType::DomainType;
-  static const size_t block_size = (basis_dimDomain == 1) ? 2 : 4;
-  static const size_t num_blocks = basis_dimRange / block_size;
-  using BlockMatrixType = XT::Common::BlockedFieldMatrix<RangeFieldType, num_blocks, block_size>;
-  using LocalMatrixType = typename BlockMatrixType::BlockType;
-  using BlockVectorType = XT::Common::BlockedFieldVector<RangeFieldType, num_blocks, block_size>;
-  using VectorType = BlockVectorType;
-  using LocalVectorType = typename BlockVectorType::BlockType;
-  using BasisValuesMatrixType = FieldVector<XT::LA::CommonDenseMatrix<RangeFieldType>, num_blocks>;
-  using QuadraturePointsType =
-      FieldVector<std::vector<BasisDomainType, boost::alignment::aligned_allocator<BasisDomainType, 64>>, num_blocks>;
-  using QuadratureWeightsType =
-      FieldVector<std::vector<RangeFieldType, boost::alignment::aligned_allocator<RangeFieldType, 64>>, num_blocks>;
-  using TemporaryVectorType = std::vector<RangeFieldType, boost::alignment::aligned_allocator<RangeFieldType, 64>>;
-  using TemporaryVectorsType = FieldVector<TemporaryVectorType, num_blocks>;
-  using AlphaReturnType = std::pair<BlockVectorType, std::pair<DomainType, RangeFieldType>>;
-  static const size_t cache_size = 4 * basis_dimDomain + 2;
-
-  explicit EntropyBasedLocalFlux(
-      const BasisfunctionType& basis_functions,
-      const RangeFieldType tau = 1e-9,
-      const RangeFieldType epsilon_gamma = 0.01,
-      const RangeFieldType chi = 0.5,
-      const RangeFieldType xi = 1e-3,
-      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
-      const size_t k_0 = 500,
-      const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const std::string name = static_id())
-    : basis_functions_(basis_functions)
-    , M_(XT::LA::CommonDenseMatrix<RangeFieldType>())
-    , tau_(tau)
-    , epsilon_gamma_(epsilon_gamma)
-    , chi_(chi)
-    , xi_(xi)
-    , r_sequence_(r_sequence)
-    , k_0_(k_0)
-    , k_max_(k_max)
-    , epsilon_(epsilon)
-    , name_(name)
-  {
-    XT::LA::eye_matrix(T_minus_one_);
-    helper<basis_dimDomain>::calculate_plane_coefficients(basis_functions_);
-    const auto& quadratures = basis_functions_.quadratures();
-    assert(quadratures.size() == num_blocks);
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      for (const auto& quad_point : quadratures[jj]) {
-        quad_points_[jj].emplace_back(quad_point.position());
-        quad_weights_[jj].emplace_back(quad_point.weight());
-      }
-    } // jj
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      while (quad_weights_[jj].size() % 8) { // align to 64 byte boundary
-        quad_points_[jj].push_back(quad_points_[jj].back());
-        quad_weights_[jj].push_back(0.);
-      }
-      M_[jj] = XT::LA::CommonDenseMatrix<RangeFieldType>(quad_points_[jj].size(), block_size, 0., 0);
-      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
-        const auto val = basis_functions_.evaluate(quad_points_[jj][ll], jj);
-        for (size_t ii = 0; ii < block_size; ++ii)
-          M_[jj].set_entry(ll, ii, val[block_size * jj + ii]);
-      } // ll
-    } // jj
-  }
-
-  virtual int order(const XT::Common::Parameter& /*param*/) const override
-  {
-    return 1;
-  }
-
-  std::unique_ptr<BlockVectorType> get_initial_alpha(const DomainType& u) const
-  {
-    static const auto alpha_iso = basis_functions_.alpha_iso();
-    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
-    return std::make_unique<BlockVectorType>(alpha_iso + alpha_iso_prime * std::log(basis_functions_.density(u)));
-  }
-
-  virtual RangeReturnType evaluate(const DomainType& u,
-                                   const XT::Common::Parameter& /*param*/ = {}) const override final
-  {
-    const auto alpha = std::make_unique<BlockVectorType>(get_alpha(u, *get_initial_alpha(u), true)->first);
-    return evaluate_with_alpha(*alpha);
-  }
-
-  virtual RangeReturnType evaluate_with_alpha(const BlockVectorType& alpha) const
-  {
-    RangeReturnType ret(0.);
-    auto& work_vecs = working_storage();
-    calculate_scalar_products(alpha, M_, work_vecs);
-    apply_exponential(work_vecs);
-    for (size_t dd = 0; dd < basis_dimDomain; ++dd) {
-      // calculate ret[dd] = < omega[dd] m G_\alpha(u) >
-      for (size_t jj = 0; jj < num_blocks; ++jj) {
-        const auto offset = block_size * jj;
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-          const auto factor = work_vecs[jj][ll] * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
-          for (size_t ii = 0; ii < block_size; ++ii)
-            ret[dd][offset + ii] += M_[jj].get_entry(ll, ii) * factor;
-        } // ll
-      } // jj
-    } // dd
-    return ret;
-  } // void evaluate(...)
-
-  virtual DerivativeRangeReturnType jacobian(const DomainType& u,
-                                             const XT::Common::Parameter& /*param*/ = {}) const override final
-  {
-    const auto alpha = std::make_unique<BlockVectorType>(get_alpha(u, *get_initial_alpha(u), true)->first);
-    return jacobian_with_alpha(*alpha);
-  }
-
-  virtual DerivativeRangeReturnType jacobian_with_alpha(const BlockVectorType& alpha) const
-  {
-    DerivativeRangeReturnType ret;
-    thread_local auto H = XT::Common::make_unique<BlockMatrixType>();
-    calculate_hessian(alpha, M_, *H);
-    helper<basis_dimDomain>::jacobian(M_, *H, ret, this);
-    return ret;
-  }
-
-  static std::string static_id()
-  {
-    return "gdt.entropybasedflux";
-  }
-
-  // calculate \sum_{i=1}^d < v_i m \psi > n_i, where n is the unit outer normal,
-  // m is the basis function vector, phi_u is the ansatz corresponding to u
-  // and x, v, t are the space, velocity and time variable, respectively
-  // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
-  DomainType evaluate_kinetic_flux(const DomainType& u_i,
-                                   const DomainType& u_j,
-                                   const BasisDomainType& n_ij,
-                                   const size_t dd) const
-  {
-    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
-    const auto alpha_i = std::make_unique<BlockVectorType>(get_alpha(u_i, *get_initial_alpha(u_i), true)->first);
-    const auto alpha_j = std::make_unique<BlockVectorType>(get_alpha(u_j, *get_initial_alpha(u_j), true)->first);
-    evaluate_kinetic_flux_with_alphas(*alpha_i, *alpha_j, n_ij, dd);
-  } // DomainType evaluate_kinetic_flux(...)
-
-  DomainType evaluate_kinetic_flux_with_alphas(const BlockVectorType& alpha_i,
-                                               const BlockVectorType& alpha_j,
-                                               const BasisDomainType& n_ij,
-                                               const size_t dd) const
-  {
-    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
-    thread_local FieldVector<TemporaryVectorsType, 2> work_vecs;
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      work_vecs[0][jj].resize(quad_points_[jj].size());
-      work_vecs[1][jj].resize(quad_points_[jj].size());
-    }
-    calculate_scalar_products(alpha_i, M_, work_vecs[0]);
-    calculate_scalar_products(alpha_j, M_, work_vecs[1]);
-    DomainType ret(0);
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      const auto offset = block_size * jj;
-      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
-        const auto position = quad_points_[jj][ll][dd];
-        RangeFieldType factor =
-            position * n_ij[dd] > 0. ? std::exp(work_vecs[0][jj][ll]) : std::exp(work_vecs[1][jj][ll]);
-        factor *= quad_weights_[jj][ll] * position;
-        for (size_t ii = 0; ii < block_size; ++ii)
-          ret[offset + ii] += M_[jj].get_entry(ll, ii) * factor;
-      } // ll
-    } // jj
-    ret *= n_ij[dd];
-    return ret;
-  } // DomainType evaluate_kinetic_flux(...)
-
-  const BasisfunctionType& basis_functions() const
+  using typename BaseType::VectorType;
+
+  explicit EntropyBasedFluxImplementation(const MomentBasis& basis_functions,
+                                          const RangeFieldType tau,
+                                          const RangeFieldType epsilon_gamma,
+                                          const RangeFieldType chi,
+                                          const RangeFieldType xi,
+                                          const std::vector<RangeFieldType> r_sequence,
+                                          const size_t k_0,
+                                          const size_t k_max,
+                                          const RangeFieldType epsilon)
+    : BaseType(basis_functions, tau, epsilon_gamma, chi, xi, r_sequence, k_0, k_max, epsilon)
+    , T_minus_one_(std::make_unique<MatrixType>())
   {
-    return basis_functions_;
+    XT::LA::eye_matrix(*T_minus_one_);
   }
 
-  std::unique_ptr<AlphaReturnType>
-  get_alpha(const DomainType& u, const DomainType& alpha_in, const bool regularize) const
+  // returns (alpha, (actual_u, r)), where r is the regularization parameter and actual_u the regularized u
+  virtual std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const override final
   {
     auto ret = std::make_unique<AlphaReturnType>();
 
     // rescale u such that the density <psi> is 1
     RangeFieldType density = basis_functions_.density(u);
-    static const auto alpha_iso_prime = std::make_unique<BlockVectorType>(basis_functions_.alpha_iso_prime());
-    auto alpha_initial = std::make_unique<BlockVectorType>(*alpha_iso_prime);
-    *alpha_initial *= -std::log(density);
-    *alpha_initial += alpha_in;
-    if (!(density > 0. || !(basis_functions_.min_density(u) > 0.)) || std::isinf(density))
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    if (!(density > 0.) || std::isinf(density))
       DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
-    auto u_prime = std::make_unique<const BlockVectorType>(u / density);
 
-    // if value has already been calculated for these values, skip computation
-    RangeFieldType tau_prime = std::min(
-        tau_ / ((1 + std::sqrt(basis_dimRange) * u_prime->two_norm()) * density + std::sqrt(basis_dimRange) * tau_),
-        tau_);
+    VectorType u_prime = u / density;
+    VectorType alpha_initial = alpha_in - alpha_iso_prime * std::log(density);
+    VectorType beta_in = alpha_initial;
+    VectorType v, u_eps_diff, g_k, beta_out;
+    RangeFieldType first_error_cond, second_error_cond, tau_prime;
 
-    // calculate moment vector for isotropic distribution
-    auto u_iso = std::make_unique<const BlockVectorType>(basis_functions_.u_iso());
+    auto u_iso = basis_functions_.u_iso();
+    const RangeFieldType dim_factor = is_full_moment_basis<MomentBasis>::value ? 1. : std::sqrt(basis_dimDomain);
+    tau_prime = std::min(tau_ / ((1 + dim_factor * u_prime.two_norm()) * density + dim_factor * tau_), tau_);
 
-    // define further variables
-    auto g_k = std::make_unique<BlockVectorType>();
-    auto beta_out = std::make_unique<BlockVectorType>();
-    auto v = std::make_unique<BlockVectorType>();
-    thread_local auto T_k = XT::Common::make_unique<BlockMatrixType>();
-    auto beta_in = std::make_unique<BlockVectorType>(*alpha_initial);
+    thread_local auto T_k = XT::Common::make_unique<MatrixType>();
 
     const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
     const auto r_max = r_sequence.back();
     for (const auto& r : r_sequence) {
       // regularize u
-      *v = *u_prime;
-      if (r > 0.) {
-        *beta_in = *alpha_initial;
-        // calculate v = (1-r) u + r u_iso
-        // use beta_out as storage for u_iso_in * r
-        *v *= (1 - r);
-        *beta_out = *u_iso;
-        *beta_out *= r;
-        *v += *beta_out;
+      v = u_prime;
+      if (r > 0) {
+        beta_in = get_isotropic_alpha(u);
+        VectorType r_times_u_iso = u_iso;
+        r_times_u_iso *= r;
+        v *= 1 - r;
+        v += r_times_u_iso;
       }
-      for (size_t jj = 0; jj < num_blocks; ++jj)
-        T_k->block(jj) = T_minus_one_;
+      *T_k = *T_minus_one_;
       // calculate T_k u
-      auto v_k = std::make_unique<BlockVectorType>(*v);
+      VectorType v_k = v;
       // calculate values of basis p = S_k m
-      thread_local BasisValuesMatrixType P_k(XT::LA::CommonDenseMatrix<RangeFieldType>(0, 0, 0., 0));
-      copy_basis_matrix(M_, P_k);
+      thread_local BasisValuesMatrixType P_k(M_.backend(), false, 0., 0);
+      std::copy_n(M_.data(), M_.rows() * M_.cols(), P_k.data());
       // calculate f_0
-      RangeFieldType f_k = calculate_scalar_integral(*beta_in, P_k) - *beta_in * *v_k;
+      RangeFieldType f_k = calculate_scalar_integral(beta_in, P_k);
+      f_k -= beta_in * v_k;
 
-      thread_local auto H = XT::Common::make_unique<BlockMatrixType>(0.);
+      thread_local auto H = XT::Common::make_unique<MatrixType>(0.);
 
       int pure_newton = 0;
       for (size_t kk = 0; kk < k_max_; ++kk) {
@@ -1086,1390 +773,1055 @@ public:
         if (kk > k_0_ && r < r_max)
           break;
         try {
-          change_basis(*beta_in, *v_k, P_k, *T_k, *g_k, *beta_out, *H);
+          change_basis(beta_in, v_k, P_k, *T_k, g_k, beta_out, *H);
         } catch (const Dune::MathError&) {
           if (r < r_max)
             break;
-          DUNE_THROW(Dune::MathError, "Failure to converge!");
+          const std::string err_msg =
+              "Failed to converge for " + XT::Common::to_string(u) + " with density " + XT::Common::to_string(density)
+              + " and multiplier " + XT::Common::to_string(beta_in)
+              + " due to errors in change_basis! Last u_eps_diff = " + XT::Common::to_string(u_eps_diff)
+              + ", first_error_cond = " + XT::Common::to_string(first_error_cond) + ", second_error_cond = "
+              + XT::Common::to_string(second_error_cond) + ", tau_prime = " + XT::Common::to_string(tau_prime);
+          DUNE_THROW(MathError, err_msg);
         }
-
         // calculate descent direction d_k;
-        thread_local auto d_k = std::make_unique<BlockVectorType>();
-        *d_k = *g_k;
-        *d_k *= -1;
-
+        VectorType d_k = g_k;
+        d_k *= -1;
         // Calculate stopping criteria (in original basis). Variables with _k are in current basis, without k in
         // original basis.
-        thread_local auto alpha_tilde = std::make_unique<BlockVectorType>();
-        thread_local auto alpha_prime = std::make_unique<BlockVectorType>();
-        thread_local auto u_alpha_tilde = std::make_unique<BlockVectorType>();
-        thread_local auto u_alpha_prime = std::make_unique<BlockVectorType>();
-        thread_local auto d_alpha_tilde = std::make_unique<BlockVectorType>();
-        thread_local auto g_alpha_tilde = std::make_unique<BlockVectorType>();
-        thread_local auto u_eps_diff = std::make_unique<BlockVectorType>();
-        // convert everything to original basis
-        for (size_t jj = 0; jj < num_blocks; ++jj) {
-          XT::LA::solve_lower_triangular_transposed(T_k->block(jj), alpha_tilde->block(jj), beta_out->block(jj));
-          XT::LA::solve_lower_triangular_transposed(T_k->block(jj), d_alpha_tilde->block(jj), d_k->block(jj));
-        } // jj
-        calculate_vector_integral(*alpha_tilde, M_, M_, *u_alpha_tilde);
-        *g_alpha_tilde = *u_alpha_tilde;
-        *g_alpha_tilde -= *v;
-        auto density_tilde = basis_functions_.density(*u_alpha_tilde);
-        if (!(density_tilde > 0.) || !(basis_functions_.min_density(*u_alpha_tilde) > 0.) || std::isinf(density_tilde))
+        VectorType alpha_tilde;
+        XT::LA::solve_lower_triangular_transposed(*T_k, alpha_tilde, beta_out);
+        VectorType u_alpha_tilde;
+        calculate_vector_integral(alpha_tilde, M_, M_, u_alpha_tilde);
+        VectorType g_alpha_tilde = u_alpha_tilde - v;
+        auto density_tilde = basis_functions_.density(u_alpha_tilde);
+        if (!(density_tilde > 0.) || std::isinf(density_tilde))
           break;
-        *alpha_prime = *alpha_iso_prime;
-        *alpha_prime *= -std::log(density_tilde);
-        *alpha_prime += *alpha_tilde;
-        calculate_vector_integral(*alpha_prime, M_, M_, *u_alpha_prime);
-        *u_eps_diff = *u_alpha_prime;
-        *u_eps_diff *= -(1 - epsilon_gamma_);
-        *u_eps_diff += *v;
-        if (g_alpha_tilde->two_norm() < tau_prime
-            && 1 - epsilon_gamma_ < std::exp(d_alpha_tilde->one_norm() + std::abs(std::log(density_tilde)))
-            && helper<basis_dimDomain>::is_realizable(*u_eps_diff, basis_functions_)) {
-          ret->first = *alpha_iso_prime;
-          ret->first *= std::log(density);
-          ret->first += *alpha_prime;
-          ret->second.first = *v;
-          ret->second.first *= density;
-          ret->second.second = r;
+        const auto alpha_prime = alpha_tilde - alpha_iso_prime * std::log(density_tilde);
+        VectorType u_alpha_prime;
+        calculate_vector_integral(alpha_prime, M_, M_, u_alpha_prime);
+        u_eps_diff = v - u_alpha_prime * (1 - epsilon_gamma_);
+        VectorType d_alpha_tilde;
+        XT::LA::solve_lower_triangular_transposed(*T_k, d_alpha_tilde, d_k);
+        first_error_cond = g_alpha_tilde.two_norm();
+        second_error_cond = std::exp(d_alpha_tilde.one_norm() + std::abs(std::log(density_tilde)));
+        if (first_error_cond < tau_prime && 1 - epsilon_gamma_ < second_error_cond
+            && realizability_helper_.is_realizable(u_eps_diff, kk == static_cast<size_t>(0.8 * k_0_))) {
+          ret->first = alpha_prime + alpha_iso_prime * std::log(density);
+          ret->second = std::make_pair(v * density, r);
           return ret;
         } else {
           RangeFieldType zeta_k = 1;
-          *beta_in = *beta_out;
+          beta_in = beta_out;
           // backtracking line search
-          while (pure_newton >= 2 || zeta_k > epsilon_ * beta_out->two_norm() / d_k->two_norm()) {
-            thread_local auto beta_new = std::make_unique<BlockVectorType>();
-            *beta_new = *d_k;
-            *beta_new *= zeta_k;
-            *beta_new += *beta_out;
-            RangeFieldType f = calculate_scalar_integral(*beta_new, P_k) - *beta_new * *v_k;
-            if (pure_newton >= 2 || f <= f_k + xi_ * zeta_k * (*g_k * *d_k)) {
-              *beta_in = *beta_new;
+          // while (pure_newton >= 2 || zeta_k > epsilon_ * beta_out.two_norm() / d_k.two_norm() * 100.) {
+          while (pure_newton >= 2 || zeta_k > epsilon_ * beta_out.two_norm() / d_k.two_norm()) {
+            VectorType beta_new = d_k;
+            beta_new *= zeta_k;
+            beta_new += beta_out;
+            RangeFieldType f = calculate_scalar_integral(beta_new, P_k);
+            f -= beta_new * v_k;
+            if (pure_newton >= 2 || XT::Common::FloatCmp::le(f, f_k + xi_ * zeta_k * (g_k * d_k))) {
+              beta_in = beta_new;
               f_k = f;
               pure_newton = 0;
               break;
             }
             zeta_k = chi_ * zeta_k;
           } // backtracking linesearch while
-          if (zeta_k <= epsilon_ * beta_out->two_norm() / d_k->two_norm())
+          if (zeta_k <= epsilon_ * beta_out.two_norm() / d_k.two_norm())
             ++pure_newton;
         } // else (stopping conditions)
       } // k loop (Newton iterations)
     } // r loop (Regularization parameter)
-    DUNE_THROW(MathError, "Failed to converge");
+    const std::string err_msg = "Failed to converge for " + XT::Common::to_string(u) + " with density "
+                                + XT::Common::to_string(density) + " and multiplier " + XT::Common::to_string(beta_in)
+                                + " due to too many iterations! Last u_eps_diff = " + XT::Common::to_string(u_eps_diff)
+                                + ", first_error_cond = " + XT::Common::to_string(first_error_cond)
+                                + ", second_error_cond = " + XT::Common::to_string(second_error_cond)
+                                + ", tau_prime = " + XT::Common::to_string(tau_prime);
+    DUNE_THROW(MathError, err_msg);
 
     return ret;
   }
 
 private:
-  // temporary vectors to store inner products and exponentials
-  TemporaryVectorsType& working_storage() const
-  {
-    thread_local TemporaryVectorsType work_vecs;
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      work_vecs[jj].resize(quad_points_[jj].size());
-    return work_vecs;
-  }
+  using BaseType::apply_inverse_matrix;
+  using BaseType::calculate_hessian;
+  using BaseType::calculate_scalar_integral;
+  using BaseType::calculate_vector_integral;
+  using BaseType::get_isotropic_alpha;
 
-  void copy_basis_matrix(const BasisValuesMatrixType& source_mat, BasisValuesMatrixType& range_mat) const
+  void change_basis(const VectorType& beta_in,
+                    VectorType& v_k,
+                    BasisValuesMatrixType& P_k,
+                    MatrixType& T_k,
+                    VectorType& g_k,
+                    VectorType& beta_out,
+                    MatrixType& H) const
   {
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      range_mat[jj].backend() = source_mat[jj].backend();
-  }
+    calculate_hessian(beta_in, P_k, H);
+    XT::LA::cholesky(H);
+    const auto& L = H;
+    thread_local std::unique_ptr<MatrixType> tmp_mat = std::make_unique<MatrixType>();
+    *tmp_mat = T_k;
+    rightmultiply(T_k, *tmp_mat, L);
+    L.mtv(beta_in, beta_out);
+    VectorType tmp_vec;
+    XT::LA::solve_lower_triangular(L, tmp_vec, v_k);
+    v_k = tmp_vec;
+    apply_inverse_matrix(L, P_k);
+    calculate_vector_integral(beta_out, P_k, P_k, g_k, false);
+    g_k -= v_k;
+  } // void change_basis(...)
 
-  void calculate_scalar_products_block(const size_t jj,
-                                       const LocalVectorType& beta_in,
-                                       const XT::LA::CommonDenseMatrix<RangeFieldType>& M,
-                                       TemporaryVectorType& scalar_products) const
-  {
-    const size_t num_quad_points = quad_points_[jj].size();
-    for (size_t ll = 0; ll < num_quad_points; ++ll) {
-      const auto* basis_ll = M.get_ptr(ll);
-      scalar_products[ll] = std::inner_product(beta_in.begin(), beta_in.end(), basis_ll, 0.);
-    } // ll
-  }
+  using BaseType::basis_functions_;
+  using BaseType::chi_;
+  using BaseType::epsilon_;
+  using BaseType::epsilon_gamma_;
+  using BaseType::k_0_;
+  using BaseType::k_max_;
+  using BaseType::M_;
+  using BaseType::quad_points_;
+  using BaseType::quad_weights_;
+  using BaseType::r_sequence_;
+  using BaseType::realizability_helper_;
+  using BaseType::tau_;
+  using BaseType::xi_;
+  const std::unique_ptr<MatrixType> T_minus_one_;
+};
+#endif
 
-  void calculate_scalar_products(const BlockVectorType& beta_in,
-                                 const BasisValuesMatrixType& M,
-                                 TemporaryVectorsType& scalar_products) const
-  {
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      calculate_scalar_products_block(jj, beta_in.block(jj), M[jj], scalar_products[jj]);
-  }
 
-  void apply_exponential(TemporaryVectorType& values) const
-  {
-    assert(values.size() < std::numeric_limits<int>::max());
-    XT::Common::Mkl::exp(static_cast<int>(values.size()), values.data(), values.data());
-  }
+#if 0
+/** Analytical flux \mathbf{f}(\mathbf{u}) = < \mu \mathbf{m} G_{\hat{\alpha}(\mathbf{u})} >,
+ * Simple backtracking Newton without change of basis
+ */
+template <class MomentBasisImp>
+class EntropyBasedFluxImplementation
+  : public EntropyBasedFluxImplementationUnspecializedBase<MomentBasisImp>
+{
+  using BaseType = EntropyBasedFluxImplementationUnspecializedBase<MomentBasisImp>;
+  using ThisType = EntropyBasedFluxImplementation;
 
-  void apply_exponential(TemporaryVectorsType& values) const
-  {
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      apply_exponential(values[jj]);
-  }
+public:
+  using BaseType::basis_dimDomain;
+  using typename BaseType::DomainType;
+  using typename BaseType::MomentBasis;
+  using typename BaseType::RangeFieldType;
+  using typename BaseType::AlphaReturnType;
+  using typename BaseType::VectorType;
+  using typename BaseType::MatrixType;
+  using typename BaseType::BasisValuesMatrixType;
 
-  // calculate ret = \int (exp(beta_in * m))
-  RangeFieldType calculate_scalar_integral(const BlockVectorType& beta_in, const BasisValuesMatrixType& M) const
+  explicit EntropyBasedFluxImplementation(const MomentBasis& basis_functions,
+                                          const RangeFieldType tau,
+                                          const RangeFieldType epsilon_gamma,
+                                          const RangeFieldType chi,
+                                          const RangeFieldType xi,
+                                          const std::vector<RangeFieldType> r_sequence,
+                                          const size_t k_0,
+                                          const size_t k_max,
+                                          const RangeFieldType epsilon)
+    : BaseType(basis_functions, tau, epsilon_gamma, chi, xi, r_sequence, k_0, k_max, epsilon)
   {
-    auto& work_vecs = working_storage();
-    calculate_scalar_products(beta_in, M, work_vecs);
-    apply_exponential(work_vecs);
-    RangeFieldType ret(0.);
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      ret += std::inner_product(
-          quad_weights_[jj].begin(), quad_weights_[jj].end(), work_vecs[jj].begin(), RangeFieldType(0.));
-    return ret;
   }
 
-  // calculate ret = \int (m1 exp(beta_in * m2))
-  void calculate_vector_integral_block(const size_t jj,
-                                       const LocalVectorType& beta_in,
-                                       const XT::LA::CommonDenseMatrix<RangeFieldType>& M1,
-                                       const XT::LA::CommonDenseMatrix<RangeFieldType>& M2,
-                                       LocalVectorType& ret) const
+  // returns (alpha, (actual_u, r)), where r is the regularization parameter and actual_u the regularized u
+  virtual std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const override final
   {
-    auto& work_vec = working_storage()[jj];
-    calculate_scalar_products_block(jj, beta_in, M2, work_vec);
-    apply_exponential(work_vec);
-    std::fill(ret.begin(), ret.end(), 0.);
-    const size_t num_quad_points = quad_weights_[jj].size();
-    for (size_t ll = 0; ll < num_quad_points; ++ll) {
-      const auto factor = work_vec[ll] * quad_weights_[jj][ll];
-      const auto* basis_ll = M1.get_ptr(ll);
-      for (size_t ii = 0; ii < block_size; ++ii)
-        ret[ii] += basis_ll[ii] * factor;
-    } // ll
-  }
+    auto ret = std::make_unique<AlphaReturnType>();
+    RangeFieldType density = basis_functions_.density(u);
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    if (!(density > 0.) || std::isinf(density))
+      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
+    VectorType u_prime = u / density;
+    VectorType alpha_initial = alpha_in - alpha_iso_prime * std::log(density);
+    VectorType v, g_k, d_k, tmp_vec, alpha_prime;
+    RangeFieldType first_error_cond, second_error_cond, tau_prime;
+    auto u_iso = basis_functions_.u_iso();
+    const RangeFieldType dim_factor = is_full_moment_basis<MomentBasis>::value ? 1. : std::sqrt(basis_dimDomain);
+    tau_prime = std::min(tau_ / ((1 + dim_factor * u_prime.two_norm()) * density + dim_factor * tau_), tau_);
+        VectorType alpha_k = alpha_initial;
+        const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
+        const auto r_max = r_sequence.back();
+        for (const auto& r : r_sequence) {
+          // regularize u
+          v = u_prime;
+          if (r > 0) {
+            alpha_k = get_isotropic_alpha(u);
+            VectorType r_times_u_iso = u_iso;
+            r_times_u_iso *= r;
+            v *= 1 - r;
+            v += r_times_u_iso;
+          }
+          // calculate T_k u
+          VectorType v_k = v;
+          // calculate f_0
+          RangeFieldType f_k = calculate_scalar_integral(alpha_k, M_);
+          f_k -= alpha_k * v_k;
 
-  // calculate ret = \int (m1 exp(beta_in * m2))
-  void calculate_vector_integral(const BlockVectorType& beta_in,
-                                 const BasisValuesMatrixType& M1,
-                                 const BasisValuesMatrixType& M2,
-                                 BlockVectorType& ret) const
-  {
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      calculate_vector_integral_block(jj, beta_in.block(jj), M1[jj], M2[jj], ret.block(jj));
-  }
+          thread_local auto H = XT::Common::make_unique<MatrixType>(0.);
 
-  void copy_transposed(const LocalMatrixType& T_k, LocalMatrixType& T_k_trans) const
-  {
-    for (size_t ii = 0; ii < block_size; ++ii)
-      for (size_t kk = 0; kk <= ii; ++kk)
-        T_k_trans[kk][ii] = T_k[ii][kk];
-  }
+          int pure_newton = 0;
+          for (size_t kk = 0; kk < k_max_; ++kk) {
+            // exit inner for loop to increase r if too many iterations are used
+            if (kk > k_0_ && r < r_max)
+              break;
+            // calculate gradient g
+            calculate_vector_integral(alpha_k, M_, M_, g_k);
+            g_k -= v_k;
+            // calculate Hessian H
+            calculate_hessian(alpha_k, M_, *H, true);
+            // calculate descent direction d_k;
+            d_k = g_k;
+            d_k *= -1;
+            try {
+              // if H = LL^T, then we have to calculate d_k = - L^{-T} L^{-1} g_k
+              // calculate H = LL^T first
+              XT::LA::cholesky(*H);
+              // calculate d_tmp = -L^{-1} g_k and store in B
+              XT::LA::solve_lower_triangular(*H, tmp_vec, d_k);
+              // calculate d_k = L^{-T} d_tmp
+              XT::LA::solve_lower_triangular_transposed(*H, d_k, tmp_vec);
+            } catch (const Dune::MathError&) {
+              if (r < r_max)
+                break;
+              const std::string err_msg =
+            "Failed to converge for " + XT::Common::to_string(u) + " with density " + XT::Common::to_string(density);
+              DUNE_THROW(MathError, err_msg);
+            }
 
-  void apply_inverse_matrix_block(const size_t jj,
-                                  const LocalMatrixType& T_k,
-                                  XT::LA::CommonDenseMatrix<RangeFieldType>& M) const
+            const auto& alpha_tilde = alpha_k;
+            auto& u_alpha_tilde = tmp_vec;
+            u_alpha_tilde = g_k + v;
+            auto density_tilde = basis_functions_.density(u_alpha_tilde);
+            if (!(density_tilde > 0.) || std::isinf(density_tilde))
+              break;
+            alpha_prime = alpha_tilde - alpha_iso_prime * std::log(density_tilde);
+            auto& u_eps_diff = tmp_vec;
+            calculate_vector_integral(alpha_prime, M_, M_, u_eps_diff);
+            u_eps_diff *= -(1 - epsilon_gamma_);
+            u_eps_diff += v;
+
+            first_error_cond = g_k.two_norm();
+            second_error_cond = std::exp(d_k.one_norm() + std::abs(std::log(density_tilde)));
+            if (first_error_cond < tau_prime && 1 - epsilon_gamma_ < second_error_cond
+                && realizability_helper_.is_realizable(u_eps_diff, kk == static_cast<size_t>(0.8 * k_0_))) {
+              ret->first = alpha_prime + alpha_iso_prime * std::log(density);
+              ret->second = std::make_pair(v * density, r);
+              return ret;
+            } else {
+              RangeFieldType zeta_k = 1;
+              // backtracking line search
+              auto& alpha_new = tmp_vec;
+              while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm()) {
+                // calculate alpha_new = alpha_k + zeta_k d_k
+                alpha_new = d_k;
+                alpha_new *= zeta_k;
+                alpha_new += alpha_k;
+                // calculate f(alpha_new)
+                RangeFieldType f_new = calculate_scalar_integral(alpha_new, M_);
+                f_new -= alpha_new * v_k;
+                if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
+                  alpha_k = alpha_new;
+                  f_k = f_new;
+                  pure_newton = 0;
+                  break;
+                }
+                zeta_k = chi_ * zeta_k;
+              } // backtracking linesearch while
+              // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
+              if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm())
+                ++pure_newton;
+            } // else (stopping conditions)
+          } // k loop (Newton iterations)
+        } // r loop (Regularization parameter)
+        const std::string err_msg =
+            "Failed to converge for " + XT::Common::to_string(u) + " with density " + XT::Common::to_string(density);
+        DUNE_THROW(MathError, err_msg);
+        return ret;
+    }
+
+  private:
+  using BaseType::get_isotropic_alpha;
+  using BaseType::calculate_scalar_integral;
+  using BaseType::calculate_vector_integral;
+  using BaseType::calculate_hessian;
+
+  using BaseType::basis_functions_;
+  using BaseType::quad_points_;
+  using BaseType::quad_weights_;
+  using BaseType::M_;
+  using BaseType::tau_;
+  using BaseType::epsilon_gamma_;
+  using BaseType::chi_;
+  using BaseType::xi_;
+  using BaseType::r_sequence_;
+  using BaseType::k_0_;
+  using BaseType::k_max_;
+  using BaseType::epsilon_;
+  using BaseType::realizability_helper_;
+};
+#endif
+
+
+#if 1
+/**
+ * Specialization for DG basis
+ */
+template <class D, size_t d, class R, size_t dimRange_or_refinements>
+class EntropyBasedFluxImplementation<PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>>
+  : public XT::Functions::FunctionInterface<PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>::dimRange,
+                                            d,
+                                            PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>::dimRange,
+                                            R>
+{
+public:
+  using MomentBasis = PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>;
+  using BaseType = typename XT::Functions::
+      FunctionInterface<MomentBasis::dimRange, MomentBasis::dimDomain, MomentBasis::dimRange, R>;
+  using ThisType = EntropyBasedFluxImplementation;
+  using BaseType::d;
+  using BaseType::r;
+  static const size_t basis_dimDomain = MomentBasis::dimDomain;
+  static const size_t basis_dimRange = MomentBasis::dimRange;
+  using typename BaseType::DerivativeRangeReturnType;
+  using typename BaseType::DomainFieldType;
+  using typename BaseType::DomainType;
+  using typename BaseType::RangeFieldType;
+  using typename BaseType::RangeReturnType;
+  using typename BaseType::RowDerivativeRangeReturnType;
+  using BasisDomainType = typename MomentBasis::DomainType;
+  static const size_t block_size = (basis_dimDomain == 1) ? 2 : 4;
+  static const size_t num_blocks = basis_dimRange / block_size;
+  using BlockMatrixType = XT::Common::BlockedFieldMatrix<RangeFieldType, num_blocks, block_size>;
+  using LocalMatrixType = typename BlockMatrixType::BlockType;
+  using BlockVectorType = XT::Common::BlockedFieldVector<RangeFieldType, num_blocks, block_size>;
+  using VectorType = BlockVectorType;
+  using LocalVectorType = typename BlockVectorType::BlockType;
+  using BasisValuesMatrixType = FieldVector<XT::LA::CommonDenseMatrix<RangeFieldType>, num_blocks>;
+  using QuadraturePointsType =
+      FieldVector<std::vector<BasisDomainType, boost::alignment::aligned_allocator<BasisDomainType, 64>>, num_blocks>;
+  using QuadratureWeightsType =
+      FieldVector<std::vector<RangeFieldType, boost::alignment::aligned_allocator<RangeFieldType, 64>>, num_blocks>;
+  using TemporaryVectorType = std::vector<RangeFieldType, boost::alignment::aligned_allocator<RangeFieldType, 64>>;
+  using TemporaryVectorsType = FieldVector<TemporaryVectorType, num_blocks>;
+  using AlphaReturnType = std::pair<BlockVectorType, std::pair<DomainType, RangeFieldType>>;
+
+  explicit EntropyBasedFluxImplementation(const MomentBasis& basis_functions,
+                                          const RangeFieldType tau,
+                                          const RangeFieldType epsilon_gamma,
+                                          const RangeFieldType chi,
+                                          const RangeFieldType xi,
+                                          const std::vector<RangeFieldType> r_sequence,
+                                          const size_t k_0,
+                                          const size_t k_max,
+                                          const RangeFieldType epsilon)
+    : basis_functions_(basis_functions)
+    , M_(XT::LA::CommonDenseMatrix<RangeFieldType>())
+    , tau_(tau)
+    , epsilon_gamma_(epsilon_gamma)
+    , chi_(chi)
+    , xi_(xi)
+    , r_sequence_(r_sequence)
+    , k_0_(k_0)
+    , k_max_(k_max)
+    , epsilon_(epsilon)
   {
-    const size_t num_quad_points = quad_points_[jj].size();
-    if (block_size == 2) {
-      const auto T_00_inv = 1 / T_k[0][0];
-      const auto T_11_inv = 1 / T_k[1][1];
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        auto* basis_ll = M.get_ptr(ll);
-        basis_ll[0] *= T_00_inv;
-        basis_ll[1] = (basis_ll[1] - T_k[1][0] * basis_ll[0]) * T_11_inv;
-      }
-    } else if (block_size == 4) {
-      FieldVector<RangeFieldType, 4> diag_inv;
-      for (size_t ii = 0; ii < 4; ++ii)
-        diag_inv[ii] = 1. / T_k[ii][ii];
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        auto* basis_ll = M.get_ptr(ll);
-        basis_ll[0] *= diag_inv[0];
-        basis_ll[1] = (basis_ll[1] - T_k[1][0] * basis_ll[0]) * diag_inv[1];
-        basis_ll[2] = (basis_ll[2] - T_k[2][0] * basis_ll[0] - T_k[2][1] * basis_ll[1]) * diag_inv[2];
-        basis_ll[3] =
-            (basis_ll[3] - T_k[3][0] * basis_ll[0] - T_k[3][1] * basis_ll[1] - T_k[3][2] * basis_ll[2]) * diag_inv[3];
+    XT::LA::eye_matrix(T_minus_one_);
+    helper<basis_dimDomain>::calculate_plane_coefficients(basis_functions_);
+    const auto& quadratures = basis_functions_.quadratures();
+    assert(quadratures.size() == num_blocks);
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      for (const auto& quad_point : quadratures[jj]) {
+        quad_points_[jj].emplace_back(quad_point.position());
+        quad_weights_[jj].emplace_back(quad_point.weight());
       }
-    } else {
-#  if HAVE_MKL || HAVE_CBLAS
-      thread_local LocalMatrixType T_k_trans(0.);
-      assert(num_quad_points < std::numeric_limits<int>::max());
-      // Calculate the transpose here first as this is much faster than passing the matrix to dtrsm and using
-      // CblasTrans
-      copy_transposed(T_k, T_k_trans);
-      XT::Common::Blas::dtrsm(XT::Common::Blas::row_major(),
-                              XT::Common::Blas::right(),
-                              XT::Common::Blas::upper(),
-                              XT::Common::Blas::no_trans(),
-                              XT::Common::Blas::non_unit(),
-                              static_cast<int>(num_quad_points),
-                              block_size,
-                              1.,
-                              &(T_k_trans[0][0]),
-                              block_size,
-                              M.data(),
-                              block_size);
-#  else
-      LocalVectorType tmp_vec, tmp_vec2;
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        std::copy_n(M.get_ptr(ll), block_size, tmp_vec.begin());
-        XT::LA::solve_lower_triangular(T_k, tmp_vec2, tmp_vec);
-        std::copy_n(tmp_vec2.begin(), block_size, M.get_ptr(ll));
+    } // jj
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      while (quad_weights_[jj].size() % 8) { // align to 64 byte boundary
+        quad_points_[jj].push_back(quad_points_[jj].back());
+        quad_weights_[jj].push_back(0.);
       }
-#  endif
-    }
+      M_[jj] = XT::LA::CommonDenseMatrix<RangeFieldType>(quad_points_[jj].size(), block_size, 0., 0);
+      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
+        const auto val = basis_functions_.evaluate(quad_points_[jj][ll], jj);
+        for (size_t ii = 0; ii < block_size; ++ii)
+          M_[jj].set_entry(ll, ii, val[block_size * jj + ii]);
+      } // ll
+    } // jj
   }
 
-  void apply_inverse_matrix(const BlockMatrixType& T_k, BasisValuesMatrixType& M) const
+  virtual int order(const XT::Common::Parameter& /*param*/) const override
   {
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      apply_inverse_matrix_block(jj, T_k.block(jj), M[jj]);
+    return 1;
   }
 
-  template <size_t domainDim = basis_dimDomain, class anything = void>
-  struct helper
+  virtual RangeReturnType evaluate(const DomainType& u,
+                                   const XT::Common::Parameter& /*param*/ = {}) const override final
   {
-    static void jacobian(const BasisValuesMatrixType& M,
-                         BlockMatrixType& H,
-                         DerivativeRangeReturnType& ret,
-                         const ThisType* entropy_flux)
-    {
-      for (size_t dd = 0; dd < domainDim; ++dd)
-        entropy_flux->row_jacobian(dd, M, H, ret[dd], dd > 0);
-    } // void jacobian(...)
-
-#  if HAVE_QHULL
-    static void calculate_plane_coefficients(const BasisfunctionType& basis_functions)
-    {
-      if (!basis_functions.plane_coefficients()[0].size())
-        basis_functions.calculate_plane_coefficients();
-    }
-
-    static bool is_realizable(const BlockVectorType& u, const BasisfunctionType& basis_functions)
-    {
-      for (size_t jj = 0; jj < num_blocks; ++jj)
-        for (const auto& coeff : basis_functions.plane_coefficients()[jj])
-          if (!(u.block(jj) * coeff.first < coeff.second))
-            return false;
-      return true;
-    }
-#  else
-    static void calculate_plane_coefficients(const BasisfunctionType& /*basis_functions*/)
-    {
-      DUNE_THROW(Dune::NotImplemented, "You are missing Qhull!");
-    }
-
-    static bool is_realizable(const BlockVectorType& /*u*/, const BasisfunctionType& /*basis_functions*/)
-    {
-      DUNE_THROW(Dune::NotImplemented, "You are missing Qhull!");
-      return false;
-    }
-#  endif
-  }; // class helper<...>
+    const auto alpha = std::make_unique<BlockVectorType>(get_alpha(u, *get_isotropic_alpha(u), true)->first);
+    return evaluate_with_alpha(*alpha);
+  }
 
-  template <class anything>
-  struct helper<1, anything>
+  virtual RangeReturnType evaluate_with_alpha(const BlockVectorType& alpha) const
   {
-    static void jacobian(const BasisValuesMatrixType& M,
-                         BlockMatrixType& H,
-                         DerivativeRangeReturnType& ret,
-                         const ThisType* entropy_flux)
-    {
-      entropy_flux->row_jacobian(0, M, H, ret, false);
-    } // void jacobian(...)
-
-    static void calculate_plane_coefficients(const BasisfunctionType& /*basis_functions*/) {}
-
-    static bool is_realizable(const BlockVectorType& u, const BasisfunctionType& basis_functions)
-    {
+    RangeReturnType ret(0.);
+    auto& work_vecs = working_storage();
+    calculate_scalar_products(alpha, M_, work_vecs);
+    apply_exponential(work_vecs);
+    for (size_t dd = 0; dd < basis_dimDomain; ++dd) {
+      // calculate ret[dd] = < omega[dd] m G_\alpha(u) >
       for (size_t jj = 0; jj < num_blocks; ++jj) {
-        const auto& u0 = u.block(jj)[0];
-        const auto& u1 = u.block(jj)[1];
-        const auto& v0 = basis_functions.triangulation()[jj];
-        const auto& v1 = basis_functions.triangulation()[jj + 1];
-        bool ret = (u0 >= 0) && (u1 <= v1 * u0) && (v0 * u0 <= u1);
-        if (!ret)
-          return false;
+        const auto offset = block_size * jj;
+        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+          const auto factor = work_vecs[jj][ll] * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
+          for (size_t ii = 0; ii < block_size; ++ii)
+            ret[dd][offset + ii] += M_[jj].get_entry(ll, ii) * factor;
+        } // ll
       } // jj
-      return true;
-    }
-  }; // class helper<1, ...>
+    } // dd
+    return ret;
+  } // void evaluate(...)
 
-  void row_jacobian(const size_t row,
-                    const BasisValuesMatrixType& M,
-                    BlockMatrixType& H,
-                    RowDerivativeRangeReturnType& ret,
-                    bool L_calculated = false) const
+  virtual DerivativeRangeReturnType jacobian(const DomainType& u,
+                                             const XT::Common::Parameter& /*param*/ = {}) const override final
   {
-    assert(row < basis_dimDomain);
-    calculate_J(M, ret, row);
-    calculate_A_Binv(ret, H, L_calculated);
-  } // void partial_u_col(...)
+    const auto alpha = std::make_unique<BlockVectorType>(get_alpha(u, *get_isotropic_alpha(u), true)->first);
+    return jacobian_with_alpha(*alpha);
+  }
 
-  // calculates A = A B^{-1}. B is assumed to be symmetric positive definite.
-  static void calculate_A_Binv(FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>& A,
-                               BlockMatrixType& B,
-                               bool L_calculated = false)
+  virtual DerivativeRangeReturnType jacobian_with_alpha(const BlockVectorType& alpha) const
   {
-    // if B = LL^T, then we have to calculate ret = A (L^T)^{-1} L^{-1} = C L^{-1}
-    // calculate B = LL^T first
-    if (!L_calculated) {
-      for (size_t jj = 0; jj < num_blocks; ++jj)
-        XT::LA::cholesky(B.block(jj));
-    }
-    FieldVector<RangeFieldType, block_size> tmp_vec;
-    FieldVector<RangeFieldType, block_size> tmp_A_row;
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      // calculate C = A (L^T)^{-1}
-      const auto offset = block_size * jj;
-      for (size_t ii = 0; ii < block_size; ++ii) {
-        for (size_t kk = 0; kk < block_size; ++kk)
-          tmp_A_row[kk] = A[offset + ii][offset + kk];
-        XT::LA::solve_lower_triangular(B.block(jj), tmp_vec, tmp_A_row);
-        // calculate ret = C L^{-1}
-        XT::LA::solve_lower_triangular_transposed(B.block(jj), tmp_A_row, tmp_vec);
-        for (size_t kk = 0; kk < block_size; ++kk)
-          A[offset + ii][offset + kk] = tmp_A_row[kk];
-      } // ii
-    } // jj
-  } // void calculate_A_Binv(...)
+    DerivativeRangeReturnType ret;
+    thread_local auto H = XT::Common::make_unique<BlockMatrixType>();
+    calculate_hessian(alpha, M_, *H);
+    helper<basis_dimDomain>::jacobian(M_, *H, ret, this);
+    return ret;
+  }
 
-  void calculate_hessian(const BlockVectorType& alpha, const BasisValuesMatrixType& M, BlockMatrixType& H) const
+  // calculate \sum_{i=1}^d < v_i m \psi > n_i, where n is the unit outer normal,
+  // m is the basis function vector, phi_u is the ansatz corresponding to u
+  // and x, v, t are the space, velocity and time variable, respectively
+  // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
+  DomainType evaluate_kinetic_flux(const DomainType& u_i,
+                                   const DomainType& u_j,
+                                   const BasisDomainType& n_ij,
+                                   const size_t dd) const
   {
-    auto& work_vec = working_storage();
-    calculate_scalar_products(alpha, M, work_vec);
-    apply_exponential(work_vec);
-    // matrix is symmetric, we only use lower triangular part
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      std::fill(H.block(jj).begin(), H.block(jj).end(), 0.);
-      const size_t num_quad_points = quad_weights_[jj].size();
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        auto factor_ll = work_vec[jj][ll] * quad_weights_[jj][ll];
-        const auto* basis_ll = M[jj].get_ptr(ll);
-        for (size_t ii = 0; ii < block_size; ++ii) {
-          auto* H_row = &(H.block(jj)[ii][0]);
-          const auto factor_ll_ii = basis_ll[ii] * factor_ll;
-          for (size_t kk = 0; kk <= ii; ++kk)
-            H_row[kk] += basis_ll[kk] * factor_ll_ii;
-        } // ii
-      } // ll
-    } // jj
-  } // void calculate_hessian(...)
+    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
+    const auto alpha_i = std::make_unique<BlockVectorType>(get_alpha(u_i, *get_isotropic_alpha(u_i), true)->first);
+    const auto alpha_j = std::make_unique<BlockVectorType>(get_alpha(u_j, *get_isotropic_alpha(u_j), true)->first);
+    evaluate_kinetic_flux_with_alphas(*alpha_i, *alpha_j, n_ij, dd);
+  } // DomainType evaluate_kinetic_flux(...)
 
-  // J = df/dalpha is the derivative of the flux with respect to alpha.
-  // As F = (f_1, f_2, f_3) is matrix-valued
-  // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
-  // vector-valued),
-  // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
-  // this function returns the dd-th matrix df_dd/dalpha of J
-  // assumes work_vecs already contains the needed exp(alpha * m) values
-  void calculate_J(const BasisValuesMatrixType& M,
-                   Dune::FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>& J_dd,
-                   const size_t dd) const
+  DomainType evaluate_kinetic_flux_with_alphas(const BlockVectorType& alpha_i,
+                                               const BlockVectorType& alpha_j,
+                                               const BasisDomainType& n_ij,
+                                               const size_t dd) const
   {
-    assert(dd < basis_dimDomain);
-    std::fill(J_dd.begin(), J_dd.end(), 0.);
-    const auto& work_vec = working_storage();
-    // matrix is symmetric, we only use lower triangular part
+    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
+    thread_local FieldVector<TemporaryVectorsType, 2> work_vecs;
     for (size_t jj = 0; jj < num_blocks; ++jj) {
-      const auto offset = jj * block_size;
-      const size_t num_quad_points = quad_weights_[jj].size();
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        auto factor_ll = work_vec[jj][ll] * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
-        const auto* basis_ll = M[jj].get_ptr(ll);
-        for (size_t ii = 0; ii < block_size; ++ii) {
-          auto* J_row = &(J_dd[offset + ii][0]);
-          const auto factor_ll_ii = basis_ll[ii] * factor_ll;
-          for (size_t kk = 0; kk <= ii; ++kk)
-            J_row[offset + kk] += basis_ll[kk] * factor_ll_ii;
-        } // ii
-      } // ll
-    } // jj
-    // symmetric update for upper triangular part of J
+      work_vecs[0][jj].resize(quad_points_[jj].size());
+      work_vecs[1][jj].resize(quad_points_[jj].size());
+    }
+    calculate_scalar_products(alpha_i, M_, work_vecs[0]);
+    calculate_scalar_products(alpha_j, M_, work_vecs[1]);
+    DomainType ret(0);
     for (size_t jj = 0; jj < num_blocks; ++jj) {
       const auto offset = block_size * jj;
-      for (size_t mm = 0; mm < block_size; ++mm)
-        for (size_t nn = mm + 1; nn < block_size; ++nn)
-          J_dd[offset + mm][offset + nn] = J_dd[offset + nn][offset + mm];
-    }
-  } // void calculate_J(...)
+      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll) {
+        const auto position = quad_points_[jj][ll][dd];
+        RangeFieldType factor =
+            position * n_ij[dd] > 0. ? std::exp(work_vecs[0][jj][ll]) : std::exp(work_vecs[1][jj][ll]);
+        factor *= quad_weights_[jj][ll] * position;
+        for (size_t ii = 0; ii < block_size; ++ii)
+          ret[offset + ii] += M_[jj].get_entry(ll, ii) * factor;
+      } // ll
+    } // jj
+    ret *= n_ij[dd];
+    return ret;
+  } // DomainType evaluate_kinetic_flux(...)
 
-  void change_basis(const BlockVectorType& beta_in,
-                    BlockVectorType& v_k,
-                    BasisValuesMatrixType& P_k,
-                    BlockMatrixType& T_k,
-                    BlockVectorType& g_k,
-                    BlockVectorType& beta_out,
-                    BlockMatrixType& H) const
+  const MomentBasis& basis_functions() const
   {
-    calculate_hessian(beta_in, P_k, H);
-    FieldVector<RangeFieldType, block_size> tmp_vec;
-    for (size_t jj = 0; jj < num_blocks; ++jj)
-      XT::LA::cholesky(H.block(jj));
-    const auto& L = H;
-    T_k.rightmultiply(L);
-    L.mtv(beta_in, beta_out);
-    for (size_t jj = 0; jj < num_blocks; ++jj) {
-      XT::LA::solve_lower_triangular(L.block(jj), tmp_vec, v_k.block(jj));
-      v_k.block(jj) = tmp_vec;
-    } // jj
-    apply_inverse_matrix(L, P_k);
-    calculate_vector_integral(beta_out, P_k, P_k, g_k);
-    g_k -= v_k;
-  } // void change_basis(...)
+    return basis_functions_;
+  }
 
-  const BasisfunctionType& basis_functions_;
-  QuadraturePointsType quad_points_;
-  QuadratureWeightsType quad_weights_;
-  BasisValuesMatrixType M_;
-  const RangeFieldType tau_;
-  const RangeFieldType epsilon_gamma_;
-  const RangeFieldType chi_;
-  const RangeFieldType xi_;
-  const std::vector<RangeFieldType> r_sequence_;
-  const size_t k_0_;
-  const size_t k_max_;
-  const RangeFieldType epsilon_;
-  LocalMatrixType T_minus_one_;
-  const std::string name_;
-};
+  std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const
+  {
+    auto ret = std::make_unique<AlphaReturnType>();
 
-template <class D, size_t d, class R, size_t dimRange_or_refinements>
-const size_t EntropyBasedLocalFlux<PartialMomentBasis<D, d, R, dimRange_or_refinements, 1>>::cache_size;
-#endif
+    // rescale u such that the density <psi> is 1
+    RangeFieldType density = basis_functions_.density(u);
+    static const auto alpha_iso_prime = std::make_unique<BlockVectorType>(basis_functions_.alpha_iso_prime());
+    auto alpha_initial = std::make_unique<BlockVectorType>(*alpha_iso_prime);
+    *alpha_initial *= -std::log(density);
+    *alpha_initial += alpha_in;
+    if (!(density > 0. || !(basis_functions_.min_density(u) > 0.)) || std::isinf(density))
+      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
+    auto u_prime = std::make_unique<const BlockVectorType>(u / density);
 
+    // if value has already been calculated for these values, skip computation
+    RangeFieldType tau_prime = std::min(
+        tau_ / ((1 + std::sqrt(basis_dimRange) * u_prime->two_norm()) * density + std::sqrt(basis_dimRange) * tau_),
+        tau_);
 
-#if 0
-/**
- * Specialization of EntropyBasedLocalFlux for 3D Hatfunctions
- */
-template <class D, class R, size_t dimRange_or_refinements>
-class EntropyBasedLocalFlux<HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>>
-  : public XT::Functions::FunctionInterface<HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>::dimRange,
-                                            3,
-                                            HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>::dimRange,
-                                            R>
-{
-public:
-  using BasisfunctionType = HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>;
-  using BaseType = typename XT::Functions::
-      FunctionInterface<BasisfunctionType::dimRange, BasisfunctionType::dimDomain, BasisfunctionType::dimRange, R>;
-  using ThisType = EntropyBasedLocalFlux;
-  using BaseType::d;
-  using BaseType::r;
-  static const size_t basis_dimDomain = BasisfunctionType::dimDomain;
-  static const size_t basis_dimRange = BasisfunctionType::dimRange;
-  using typename BaseType::DerivativeRangeReturnType;
-  using typename BaseType::DomainFieldType;
-  using typename BaseType::DomainType;
-  using typename BaseType::RangeFieldType;
-  using typename BaseType::RangeReturnType;
-  using typename BaseType::RowDerivativeRangeReturnType;
-  using BasisDomainType = typename BasisfunctionType::DomainType;
-  using MatrixType = XT::Common::FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>;
-  using DynamicRangeType = DynamicVector<RangeFieldType>;
-  using LocalVectorType = XT::Common::FieldVector<RangeFieldType, 3>;
-  using LocalMatrixType = XT::Common::FieldMatrix<RangeFieldType, 3, 3>;
-  using BasisValuesMatrixType = std::vector<std::vector<LocalVectorType>>;
-  using QuadraturePointsType = std::vector<std::vector<BasisDomainType>>;
-  using QuadratureWeightsType = std::vector<std::vector<RangeFieldType>>;
-#  if HAVE_EIGEN
-  using SparseMatrixType = typename XT::LA::Container<RangeFieldType, XT::LA::Backends::eigen_sparse>::MatrixType;
-  using VectorType = typename XT::LA::Container<RangeFieldType, XT::LA::Backends::eigen_sparse>::VectorType;
-#  else
-  using SparseMatrixType = typename XT::LA::Container<RangeFieldType, XT::LA::default_sparse_backend>::MatrixType;
-  using VectorType = typename XT::LA::Container<RangeFieldType, XT::LA::default_sparse_backend>::VectorType;
-#  endif
-  using AlphaReturnType = std::pair<VectorType, std::pair<DomainType, RangeFieldType>>;
-  static const size_t cache_size = 4 * basis_dimDomain + 2;
+    // calculate moment vector for isotropic distribution
+    auto u_iso = std::make_unique<const BlockVectorType>(basis_functions_.u_iso());
 
-  explicit EntropyBasedLocalFlux(
-      const BasisfunctionType& basis_functions,
-      const RangeFieldType tau = 1e-9,
-      const RangeFieldType epsilon_gamma = 0.01,
-      const RangeFieldType chi = 0.5,
-      const RangeFieldType xi = 1e-3,
-      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
-      const size_t k_0 = 500,
-      const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const std::string name = "")
-    : basis_functions_(basis_functions)
-    , quad_points_(basis_functions_.triangulation().faces().size())
-    , quad_weights_(basis_functions_.triangulation().faces().size())
-    , M_(basis_functions_.triangulation().faces().size())
-    , tau_(tau)
-    , epsilon_gamma_(epsilon_gamma)
-    , chi_(chi)
-    , xi_(xi)
-    , r_sequence_(r_sequence)
-    , k_0_(k_0)
-    , k_max_(k_max)
-    , epsilon_(epsilon)
-    , name_(name)
-    , cache_(cache_size)
-  {
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& vertices = triangulation.vertices();
-    const auto& faces = triangulation.faces();
-    assert(vertices.size() == basis_dimRange);
-    // create pattern
-    XT::LA::SparsityPatternDefault pattern(basis_dimRange);
-    for (size_t vertex_index = 0; vertex_index < basis_dimRange; ++vertex_index) {
-      const auto& vertex = vertices[vertex_index];
-      const auto& adjacent_faces = triangulation.get_face_indices(vertex->position());
-      for (const auto& face_index : adjacent_faces) {
-        const auto& face = faces[face_index];
-        assert(face->vertices().size() == 3);
-        for (size_t jj = 0; jj < 3; ++jj)
-          pattern.insert(vertex_index, face->vertices()[jj]->index());
-      }
-    }
-    pattern.sort();
-    pattern_ = pattern;
-    // store basis evaluations
-    const auto& quadratures = basis_functions_.quadratures();
-    assert(quadratures.size() == faces.size());
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      for (const auto& quad_point : quadratures[jj]) {
-        quad_points_[jj].emplace_back(quad_point.position());
-        quad_weights_[jj].emplace_back(quad_point.weight());
-      }
-    } // jj
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      M_[jj] = std::vector<LocalVectorType>(quad_points_[jj].size());
-      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll)
-        M_[jj][ll] = basis_functions_.evaluate_on_face(quad_points_[jj][ll], jj);
-    } // jj
-  } // constructor
-
-  virtual int order(const XT::Common::Parameter& /*param*/ = {}) const override
-  {
-    return 1;
-  }
-
-  static std::string static_id()
-  {
-    return "gdt.entropybasedflux";
-  }
-
-  virtual RangeReturnType evaluate(const DomainType& u, const XT::Common::Parameter& param = {}) const override final
-  {
-    RangeReturnType ret(0.);
-    const auto alpha = get_alpha(u, param, true)->first;
-    LocalVectorType local_alpha, local_ret;
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    for (size_t dd = 0; dd < basis_dimDomain; ++dd) {
-      // calculate ret[dd] = < omega[dd] m G_\alpha(u) >
-      for (size_t jj = 0; jj < faces.size(); ++jj) {
-        local_ret *= 0.;
-        const auto& face = faces[jj];
-        const auto& vertices = face->vertices();
-        for (size_t ii = 0; ii < 3; ++ii)
-          local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-          const auto& basis_ll = M_[jj][ll];
-          auto factor_ll = std::exp(local_alpha * basis_ll) * quad_points_[jj][ll][dd] * quad_weights_[jj][ll];
-          for (size_t ii = 0; ii < 3; ++ii)
-            local_ret[ii] += basis_ll[ii] * factor_ll;
-        } // ll (quad points)
-        for (size_t ii = 0; ii < 3; ++ii)
-          ret[dd][vertices[ii]->index()] += local_ret[ii];
-      } // jj (faces)
-    } // dd
-    return ret;
-  } // void evaluate(...)
-
-  virtual DerivativeRangeReturnType jacobian(const DomainType& u,
-                                             const XT::Common::Parameter& param = {}) const override final
-  {
-    DerivativeRangeReturnType ret;
-    const auto alpha = get_alpha(u, param, true)->first;
-    thread_local SparseMatrixType H(basis_dimRange, basis_dimRange, pattern_, 0);
-    thread_local SparseMatrixType J(basis_dimRange, basis_dimRange, pattern_, 0);
-    calculate_hessian(alpha, M_, H);
-    for (size_t dd = 0; dd < basis_dimDomain; ++dd) {
-      calculate_J(M_, J, dd);
-      calculate_J_Hinv(J, H, ret[dd]);
-    }
-    return ret;
-  } // ... jacobian(...)
+    // define further variables
+    auto g_k = std::make_unique<BlockVectorType>();
+    auto beta_out = std::make_unique<BlockVectorType>();
+    auto v = std::make_unique<BlockVectorType>();
+    thread_local auto T_k = XT::Common::make_unique<BlockMatrixType>();
+    auto beta_in = std::make_unique<BlockVectorType>(*alpha_initial);
 
-  // calculate \sum_{i=1}^d < v_i m \psi > n_i, where n is the unit outer normal,
-  // m is the basis function vector, phi_u is the ansatz corresponding to u
-  // and x, v, t are the space, velocity and time variable, respectively
-  // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
-  DomainType evaluate_kinetic_flux(const DomainType& u_i,
-                                   const DomainType& u_j,
-                                   const BasisDomainType& n_ij,
-                                   const size_t dd,
-                                   const XT::Common::Parameter& param) const
-  {
-    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
-    const auto alpha_i = get_alpha(u_i, param, true)->first;
-    const auto alpha_j = get_alpha(u_j, param, true)->first;
-    DomainType ret(0);
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    LocalVectorType local_alpha_i, local_alpha_j, local_ret;
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      local_ret *= 0.;
-      const auto& face = faces[jj];
-      const auto& vertices = face->vertices();
-      for (size_t ii = 0; ii < 3; ++ii) {
-        local_alpha_i[ii] = alpha_i.get_entry(vertices[ii]->index());
-        local_alpha_j[ii] = alpha_j.get_entry(vertices[ii]->index());
+    const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
+    const auto r_max = r_sequence.back();
+    for (const auto& r : r_sequence) {
+      // regularize u
+      *v = *u_prime;
+      if (r > 0.) {
+        *beta_in = *get_isotropic_alpha(u);
+        // calculate v = (1-r) u + r u_iso
+        // use beta_out as storage for u_iso_in * r
+        *v *= (1 - r);
+        *beta_out = *u_iso;
+        *beta_out *= r;
+        *v += *beta_out;
       }
-      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-        const auto& basis_ll = M_[jj][ll];
-        const auto position = quad_points_[jj][ll][dd];
-        RangeFieldType factor =
-            position * n_ij[dd] > 0. ? std::exp(local_alpha_i * basis_ll) : std::exp(local_alpha_j * basis_ll);
-        factor *= quad_weights_[jj][ll] * position;
-        for (size_t ii = 0; ii < 3; ++ii)
-          local_ret[ii] += basis_ll[ii] * factor;
-      } // ll (quad points)
-      for (size_t ii = 0; ii < 3; ++ii)
-        ret[vertices[ii]->index()] += local_ret[ii];
-    } // jj (faces)
-    ret *= n_ij[dd];
-    return ret;
-  } // DomainType evaluate_kinetic_flux(...)
-
-  const BasisfunctionType& basis_functions() const
-  {
-    return basis_functions_;
-  }
-
-  std::unique_ptr<AlphaReturnType>
-  get_alpha(const DomainType& u, const XT::Common::Parameter& /*param*/, const bool regularize) const
-  {
-    // get initial multiplier and basis matrix from last time step
-    auto ret = std::make_unique<AlphaReturnType>();
-
-    // rescale u such that the density <psi> is 1
-    RangeFieldType density = basis_functions_.density(u);
-    if (!(density > 0.) || std::isinf(density))
-      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
+      for (size_t jj = 0; jj < num_blocks; ++jj)
+        T_k->block(jj) = T_minus_one_;
+      // calculate T_k u
+      auto v_k = std::make_unique<BlockVectorType>(*v);
+      // calculate values of basis p = S_k m
+      thread_local BasisValuesMatrixType P_k(XT::LA::CommonDenseMatrix<RangeFieldType>(0, 0, 0., 0));
+      copy_basis_matrix(M_, P_k);
+      // calculate f_0
+      RangeFieldType f_k = calculate_scalar_integral(*beta_in, P_k) - *beta_in * *v_k;
 
-    VectorType u_prime(basis_dimRange, 0., 0);
-    for (size_t ii = 0; ii < basis_dimRange; ++ii)
-      u_prime.set_entry(ii, u[ii] / density);
-    VectorType alpha_iso(basis_dimRange, 0., 0);
-    basis_functions_.alpha_iso(alpha_iso);
+      thread_local auto H = XT::Common::make_unique<BlockMatrixType>(0.);
 
-    // if value has already been calculated for these values, skip computation
-    const auto cache_iterator = cache_->find_closest(u_prime);
-    if (cache_iterator != cache_->end() && XT::Common::FloatCmp::eq(cache_iterator->first, u_prime, 1e-14, 1e-14)) {
-      const auto& alpha_prime = cache_iterator->second;
-      ret->first = alpha_iso;
-      ret->first *= std::log(density);
-      ret->first += alpha_prime;
-      ret->second = std::make_pair(u, 0.);
-      return ret;
-    } else {
-      RangeFieldType tau_prime = std::min(
-          tau_ / ((1 + std::sqrt(basis_dimRange) * u_prime.l2_norm()) * density + std::sqrt(basis_dimRange) * tau_),
-          tau_);
-      thread_local SparseMatrixType H(basis_dimRange, basis_dimRange, pattern_, 0);
-      thread_local auto solver = XT::LA::make_solver(H);
-
-      // calculate moment vector for isotropic distribution
-      VectorType u_iso(basis_dimRange, 0., 0);
-      basis_functions_.u_iso(u_iso);
-      VectorType alpha_k = cache_iterator != cache_->end() ? cache_iterator->second : alpha_iso;
-      VectorType v(basis_dimRange, 0., 0), g_k(basis_dimRange, 0., 0), d_k(basis_dimRange, 0., 0),
-          tmp_vec(basis_dimRange, 0., 0), alpha_prime(basis_dimRange);
-      const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
-      const auto r_max = r_sequence.back();
-      for (const auto& r : r_sequence_) {
-        // regularize u
-        v = u_prime;
-        if (r > 0) {
-          alpha_k = alpha_iso;
-          tmp_vec = u_iso;
-          tmp_vec *= r;
-          v *= 1 - r;
-          v += tmp_vec;
+      int pure_newton = 0;
+      for (size_t kk = 0; kk < k_max_; ++kk) {
+        // exit inner for loop to increase r if too many iterations are used or cholesky decomposition fails
+        if (kk > k_0_ && r < r_max)
+          break;
+        try {
+          change_basis(*beta_in, *v_k, P_k, *T_k, *g_k, *beta_out, *H);
+        } catch (const Dune::MathError&) {
+          if (r < r_max)
+            break;
+          DUNE_THROW(Dune::MathError, "Failure to converge!");
         }
 
-        // calculate f_0
-        RangeFieldType f_k = calculate_f(alpha_k, v);
+        // calculate descent direction d_k;
+        thread_local auto d_k = std::make_unique<BlockVectorType>();
+        *d_k = *g_k;
+        *d_k *= -1;
 
-        int pure_newton = 0;
-        for (size_t kk = 0; kk < k_max_; ++kk) {
-          // exit inner for loop to increase r if too many iterations are used
-          if (kk > k_0_ && r < r_max)
-            break;
-          // calculate gradient g
-          calculate_gradient(alpha_k, v, g_k);
-          // calculate Hessian H
-          calculate_hessian(alpha_k, M_, H, true);
-          // calculate descent direction d_k;
-          tmp_vec = g_k;
-          tmp_vec *= -1;
-          try {
-            solver.apply(tmp_vec, d_k);
-          } catch (const XT::LA::Exceptions::linear_solver_failed& error) {
-            if (r < r_max) {
+        // Calculate stopping criteria (in original basis). Variables with _k are in current basis, without k in
+        // original basis.
+        thread_local auto alpha_tilde = std::make_unique<BlockVectorType>();
+        thread_local auto alpha_prime = std::make_unique<BlockVectorType>();
+        thread_local auto u_alpha_tilde = std::make_unique<BlockVectorType>();
+        thread_local auto u_alpha_prime = std::make_unique<BlockVectorType>();
+        thread_local auto d_alpha_tilde = std::make_unique<BlockVectorType>();
+        thread_local auto g_alpha_tilde = std::make_unique<BlockVectorType>();
+        thread_local auto u_eps_diff = std::make_unique<BlockVectorType>();
+        // convert everything to original basis
+        for (size_t jj = 0; jj < num_blocks; ++jj) {
+          XT::LA::solve_lower_triangular_transposed(T_k->block(jj), alpha_tilde->block(jj), beta_out->block(jj));
+          XT::LA::solve_lower_triangular_transposed(T_k->block(jj), d_alpha_tilde->block(jj), d_k->block(jj));
+        } // jj
+        calculate_vector_integral(*alpha_tilde, M_, M_, *u_alpha_tilde);
+        *g_alpha_tilde = *u_alpha_tilde;
+        *g_alpha_tilde -= *v;
+        auto density_tilde = basis_functions_.density(*u_alpha_tilde);
+        if (!(density_tilde > 0.) || !(basis_functions_.min_density(*u_alpha_tilde) > 0.) || std::isinf(density_tilde))
+          break;
+        *alpha_prime = *alpha_iso_prime;
+        *alpha_prime *= -std::log(density_tilde);
+        *alpha_prime += *alpha_tilde;
+        calculate_vector_integral(*alpha_prime, M_, M_, *u_alpha_prime);
+        *u_eps_diff = *u_alpha_prime;
+        *u_eps_diff *= -(1 - epsilon_gamma_);
+        *u_eps_diff += *v;
+        if (g_alpha_tilde->two_norm() < tau_prime
+            && 1 - epsilon_gamma_ < std::exp(d_alpha_tilde->one_norm() + std::abs(std::log(density_tilde)))
+            && helper<basis_dimDomain>::is_realizable(*u_eps_diff, basis_functions_)) {
+          ret->first = *alpha_iso_prime;
+          ret->first *= std::log(density);
+          ret->first += *alpha_prime;
+          ret->second.first = *v;
+          ret->second.first *= density;
+          ret->second.second = r;
+          return ret;
+        } else {
+          RangeFieldType zeta_k = 1;
+          *beta_in = *beta_out;
+          // backtracking line search
+          while (pure_newton >= 2 || zeta_k > epsilon_ * beta_out->two_norm() / d_k->two_norm()) {
+            thread_local auto beta_new = std::make_unique<BlockVectorType>();
+            *beta_new = *d_k;
+            *beta_new *= zeta_k;
+            *beta_new += *beta_out;
+            RangeFieldType f = calculate_scalar_integral(*beta_new, P_k) - *beta_new * *v_k;
+            if (pure_newton >= 2 || f <= f_k + xi_ * zeta_k * (*g_k * *d_k)) {
+              *beta_in = *beta_new;
+              f_k = f;
+              pure_newton = 0;
               break;
-            } else {
-              DUNE_THROW(XT::LA::Exceptions::linear_solver_failed,
-                         "Failure to converge, solver error was: " << error.what());
             }
-          }
+            zeta_k = chi_ * zeta_k;
+          } // backtracking linesearch while
+          if (zeta_k <= epsilon_ * beta_out->two_norm() / d_k->two_norm())
+            ++pure_newton;
+        } // else (stopping conditions)
+      } // k loop (Newton iterations)
+    } // r loop (Regularization parameter)
+    DUNE_THROW(MathError, "Failed to converge");
 
-          const auto& alpha_tilde = alpha_k;
-          auto& u_alpha_tilde = tmp_vec;
-          u_alpha_tilde = g_k;
-          u_alpha_tilde += v;
-          auto density_tilde = basis_functions_.density(u_alpha_tilde);
-          if (!(density_tilde > 0.) || std::isinf(density_tilde))
-            break;
-          alpha_prime = alpha_iso;
-          alpha_prime *= -std::log(density_tilde);
-          alpha_prime += alpha_tilde;
-          auto& u_eps_diff = tmp_vec;
-          calculate_u(alpha_prime, u_eps_diff); // store u_alpha_prime in u_eps_diff
-          u_eps_diff *= -(1 - epsilon_gamma_);
-          u_eps_diff += v;
-          // checking realizability is cheap so we do not need the second stopping criterion
-          if (g_k.l2_norm() < tau_prime && is_realizable(u_eps_diff)) {
-            ret->first = alpha_iso;
-            ret->first *= std::log(density);
-            ret->first += alpha_prime;
-            auto v_ret_eig = v * density;
-            DomainType v_ret;
-            for (size_t ii = 0; ii < d; ++ii)
-              v_ret[ii] = v_ret_eig[ii];
-            ret->second = std::make_pair(v_ret, r);
-            cache_->insert(v, alpha_prime);
-            return ret;
-          } else {
-            RangeFieldType zeta_k = 1;
-            // backtracking line search
-            auto& alpha_new = tmp_vec;
-            while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.l2_norm() / d_k.l2_norm()) {
-              // calculate alpha_new = alpha_k + zeta_k d_k
-              alpha_new = d_k;
-              alpha_new *= zeta_k;
-              alpha_new += alpha_k;
-              // calculate f(alpha_new)
-              RangeFieldType f_new = calculate_f(alpha_new, v);
-              if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
-                alpha_k = alpha_new;
-                f_k = f_new;
-                pure_newton = 0;
-                break;
-              }
-              zeta_k = chi_ * zeta_k;
-            } // backtracking linesearch while
-            // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
-            if (zeta_k <= epsilon_ * alpha_k.l2_norm() / d_k.l2_norm())
-              ++pure_newton;
-          } // else (stopping conditions)
-        } // k loop (Newton iterations)
-      } // r loop (Regularization parameter)
-      DUNE_THROW(MathError, "Failed to converge");
-    } // else ( value has not been calculated before )
     return ret;
-  } // ... get_alpha(...)
-
-private:
-  static bool is_realizable(const VectorType& u)
-  {
-    for (const auto& u_i : u)
-      if (!(u_i > 0.) || std::isinf(u_i))
-        return false;
-    return true;
   }
 
+private:
   // temporary vectors to store inner products and exponentials
-  std::vector<std::vector<RangeFieldType>>& get_work_vecs() const
+  TemporaryVectorsType& working_storage() const
   {
-    thread_local std::vector<std::vector<RangeFieldType>> work_vecs;
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    work_vecs.resize(faces.size());
-    for (size_t jj = 0; jj < faces.size(); ++jj)
+    thread_local TemporaryVectorsType work_vecs;
+    for (size_t jj = 0; jj < num_blocks; ++jj)
       work_vecs[jj].resize(quad_points_[jj].size());
     return work_vecs;
   }
 
-private:
-  // calculates ret = J H^{-1}. H is assumed to be symmetric positive definite, which gives ret^T = H^{-T} J^T =
-  // H^{-1} J^T, so we just have to solve y = H^{-1} x for each row x of J
-  void calculate_J_Hinv(SparseMatrixType& J, const SparseMatrixType& H, RowDerivativeRangeReturnType& ret) const
+  std::unique_ptr<BlockVectorType> get_isotropic_alpha(const DomainType& u) const
   {
-    thread_local VectorType solution(basis_dimRange, 0., 0), tmp_rhs(basis_dimRange, 0., 0);
-#  if HAVE_EIGEN
-    typedef ::Eigen::SparseMatrix<RangeFieldType, ::Eigen::ColMajor> ColMajorBackendType;
-    ColMajorBackendType colmajor_copy(H.backend());
-    colmajor_copy.makeCompressed();
-    typedef ::Eigen::SimplicialLDLT<ColMajorBackendType> SolverType;
-    SolverType solver;
-    solver.analyzePattern(colmajor_copy);
-    solver.factorize(colmajor_copy);
-#  else // HAVE_EIGEN
-    auto solver = XT::LA::make_solver(H);
-#  endif // HAVE_EIGEN
-    for (size_t ii = 0; ii < basis_dimRange; ++ii) {
-      // copy row to VectorType
-      for (size_t kk = 0; kk < basis_dimRange; ++kk)
-        tmp_rhs.set_entry(kk, J.get_entry(ii, kk));
-        // solve
-#  if HAVE_EIGEN
-      solution.backend() = solver.solve(tmp_rhs.backend());
-#  else // HAVE_EIGEN
-      solver.apply(tmp_rhs, solution);
-#  endif
-      // copy result to C
-      for (size_t kk = 0; kk < basis_dimRange; ++kk)
-        ret[ii][kk] = solution.get_entry(kk);
-    }
-  } // void calculate_J_Hinv(...)
-
-  RangeFieldType calculate_f(const VectorType& alpha, const VectorType& v) const
-  {
-    RangeFieldType ret(0.);
-    XT::Common::FieldVector<RangeFieldType, 3> local_alpha;
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      const auto& face = faces[jj];
-      const auto& vertices = face->vertices();
-      for (size_t ii = 0; ii < 3; ++ii)
-        local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
-      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll)
-        ret += std::exp(local_alpha * M_[jj][ll]) * quad_weights_[jj][ll];
-    } // jj (faces)
-    ret -= alpha * v;
-    return ret;
-  } // void calculate_u(...)
-
-  void calculate_u(const VectorType& alpha, VectorType& u) const
-  {
-    u *= 0.;
-    LocalVectorType local_alpha, local_u;
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    auto& work_vecs = get_work_vecs();
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      const auto& face = faces[jj];
-      const auto& vertices = face->vertices();
-      local_u *= 0.;
-      for (size_t ii = 0; ii < 3; ++ii)
-        local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
-      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-        const auto& basis_ll = M_[jj][ll];
-        work_vecs[jj][ll] = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
-        for (size_t ii = 0; ii < 3; ++ii)
-          local_u[ii] += basis_ll[ii] * work_vecs[jj][ll];
-      } // ll (quad points)
-      for (size_t ii = 0; ii < 3; ++ii)
-        u.add_to_entry(vertices[ii]->index(), local_u[ii]);
-    } // jj (faces)
-  } // void calculate_u(...)
+    static const auto alpha_iso = basis_functions_.alpha_iso();
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    return std::make_unique<BlockVectorType>(alpha_iso + alpha_iso_prime * std::log(basis_functions_.density(u)));
+  }
 
-  void calculate_gradient(const VectorType& alpha, const VectorType& v, VectorType& g_k) const
+  void copy_basis_matrix(const BasisValuesMatrixType& source_mat, BasisValuesMatrixType& range_mat) const
   {
-    calculate_u(alpha, g_k);
-    g_k -= v;
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      range_mat[jj].backend() = source_mat[jj].backend();
   }
 
-  void calculate_hessian(const VectorType& alpha,
-                         const BasisValuesMatrixType& M,
-                         SparseMatrixType& H,
-                         const bool use_work_vecs_results = false) const
+  void calculate_scalar_products_block(const size_t jj,
+                                       const LocalVectorType& beta_in,
+                                       const XT::LA::CommonDenseMatrix<RangeFieldType>& M,
+                                       TemporaryVectorType& scalar_products) const
   {
-    H *= 0.;
-    LocalVectorType local_alpha;
-    LocalMatrixType H_local(0.);
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    auto& work_vecs = get_work_vecs();
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      H_local *= 0.;
-      const auto& face = faces[jj];
-      const auto& vertices = face->vertices();
-      for (size_t ii = 0; ii < 3; ++ii)
-        local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
-      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-        const auto& basis_ll = M[jj][ll];
-        if (!use_work_vecs_results)
-          work_vecs[jj][ll] = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
-        for (size_t ii = 0; ii < 3; ++ii)
-          for (size_t kk = 0; kk < 3; ++kk)
-            H_local[ii][kk] += basis_ll[ii] * basis_ll[kk] * work_vecs[jj][ll];
-      } // ll (quad points)
-      for (size_t ii = 0; ii < 3; ++ii)
-        for (size_t kk = 0; kk < 3; ++kk)
-          H.add_to_entry(vertices[ii]->index(), vertices[kk]->index(), H_local[ii][kk]);
-    } // jj (faces)
-  } // void calculate_hessian(...)
+    const size_t num_quad_points = quad_points_[jj].size();
+    for (size_t ll = 0; ll < num_quad_points; ++ll) {
+      const auto* basis_ll = M.get_ptr(ll);
+      scalar_products[ll] = std::inner_product(beta_in.begin(), beta_in.end(), basis_ll, 0.);
+    } // ll
+  }
 
-  // J = df/dalpha is the derivative of the flux with respect to alpha.
-  // As F = (f_1, f_2, f_3) is matrix-valued
-  // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
-  // vector-valued),
-  // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
-  // this function returns the dd-th matrix df_dd/dalpha of J
-  // assumes work_vecs already contains the needed exp(alpha * m) values
-  void calculate_J(const BasisValuesMatrixType& M, SparseMatrixType& J_dd, const size_t dd) const
+  void calculate_scalar_products(const BlockVectorType& beta_in,
+                                 const BasisValuesMatrixType& M,
+                                 TemporaryVectorsType& scalar_products) const
   {
-    assert(dd < basis_dimDomain);
-    J_dd *= 0.;
-    LocalMatrixType J_local(0.);
-    auto& work_vecs = get_work_vecs();
-    const auto& triangulation = basis_functions_.triangulation();
-    const auto& faces = triangulation.faces();
-    for (size_t jj = 0; jj < faces.size(); ++jj) {
-      J_local *= 0.;
-      const auto& face = faces[jj];
-      const auto& vertices = face->vertices();
-      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-        const auto& basis_ll = M[jj][ll];
-        for (size_t ii = 0; ii < 3; ++ii)
-          for (size_t kk = 0; kk < 3; ++kk)
-            J_local[ii][kk] += basis_ll[ii] * basis_ll[kk] * work_vecs[jj][ll] * quad_points_[jj][ll][dd];
-      } // ll (quad points)
-      for (size_t ii = 0; ii < 3; ++ii)
-        for (size_t kk = 0; kk < 3; ++kk)
-          J_dd.add_to_entry(vertices[ii]->index(), vertices[kk]->index(), J_local[ii][kk]);
-    } // jj (faces)
-  } // void calculate_J(...)
-
-  const BasisfunctionType& basis_functions_;
-  QuadraturePointsType quad_points_;
-  QuadratureWeightsType quad_weights_;
-  BasisValuesMatrixType M_;
-  const RangeFieldType tau_;
-  const RangeFieldType epsilon_gamma_;
-  const RangeFieldType chi_;
-  const RangeFieldType xi_;
-  const std::vector<RangeFieldType> r_sequence_;
-  const size_t k_0_;
-  const size_t k_max_;
-  const RangeFieldType epsilon_;
-  const std::string name_;
-  XT::LA::SparsityPatternDefault pattern_;
-};
-
-template <class D, class R, size_t dimRange_or_refinements>
-const size_t EntropyBasedLocalFlux<HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>>::cache_size;
-#endif
-
-
-template <class KeyVectorType, class ValueVectorType>
-class EntropyLocalCache
-{
-public:
-  using MapType = typename std::map<KeyVectorType, ValueVectorType, XT::Common::VectorLess>;
-  using IteratorType = typename MapType::iterator;
-  using ConstIteratorType = typename MapType::const_iterator;
-  using RangeFieldType = typename XT::Common::VectorAbstraction<KeyVectorType>::ScalarType;
-
-  EntropyLocalCache(const size_t capacity = 0)
-    : capacity_(capacity)
-  {}
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      calculate_scalar_products_block(jj, beta_in.block(jj), M[jj], scalar_products[jj]);
+  }
 
-  void insert(const KeyVectorType& u, const ValueVectorType& alpha)
+  void apply_exponential(TemporaryVectorType& values) const
   {
-    cache_.insert(std::make_pair(u, alpha));
-    keys_.push_back(u);
-    if (cache_.size() > capacity_) {
-      cache_.erase(keys_.front());
-      keys_.pop_front();
-    }
+    assert(values.size() < std::numeric_limits<int>::max());
+    XT::Common::Mkl::exp(static_cast<int>(values.size()), values.data(), values.data());
   }
 
-  std::pair<RangeFieldType, ConstIteratorType> find_closest(const KeyVectorType& u) const
+  void apply_exponential(TemporaryVectorsType& values) const
   {
-    ConstIteratorType ret = cache_.begin();
-    if (ret == end())
-      return std::make_pair(std::numeric_limits<RangeFieldType>::max(), ret);
-    auto diff = u - ret->first;
-    // use infinity_norm as distance
-    RangeFieldType distance = infinity_norm(diff);
-    auto it = ret;
-    while (++it != end()) {
-      if (XT::Common::FloatCmp::eq(distance, 0.))
-        break;
-      diff = u - it->first;
-      RangeFieldType new_distance = infinity_norm(diff);
-      if (new_distance < distance) {
-        distance = new_distance;
-        ret = it;
-      }
-    }
-    return std::make_pair(distance, ret);
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      apply_exponential(values[jj]);
   }
 
-  IteratorType begin()
+  // calculate ret = \int (exp(beta_in * m))
+  RangeFieldType calculate_scalar_integral(const BlockVectorType& beta_in, const BasisValuesMatrixType& M) const
   {
-    return cache_.begin();
+    auto& work_vecs = working_storage();
+    calculate_scalar_products(beta_in, M, work_vecs);
+    apply_exponential(work_vecs);
+    RangeFieldType ret(0.);
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      ret += std::inner_product(
+          quad_weights_[jj].begin(), quad_weights_[jj].end(), work_vecs[jj].begin(), RangeFieldType(0.));
+    return ret;
   }
 
-  ConstIteratorType begin() const
+  // calculate ret = \int (m1 exp(beta_in * m2))
+  void calculate_vector_integral_block(const size_t jj,
+                                       const LocalVectorType& beta_in,
+                                       const XT::LA::CommonDenseMatrix<RangeFieldType>& M1,
+                                       const XT::LA::CommonDenseMatrix<RangeFieldType>& M2,
+                                       LocalVectorType& ret) const
   {
-    return cache_.begin();
+    auto& work_vec = working_storage()[jj];
+    calculate_scalar_products_block(jj, beta_in, M2, work_vec);
+    apply_exponential(work_vec);
+    std::fill(ret.begin(), ret.end(), 0.);
+    const size_t num_quad_points = quad_weights_[jj].size();
+    for (size_t ll = 0; ll < num_quad_points; ++ll) {
+      const auto factor = work_vec[ll] * quad_weights_[jj][ll];
+      const auto* basis_ll = M1.get_ptr(ll);
+      for (size_t ii = 0; ii < block_size; ++ii)
+        ret[ii] += basis_ll[ii] * factor;
+    } // ll
   }
 
-  IteratorType end()
+  // calculate ret = \int (m1 exp(beta_in * m2))
+  void calculate_vector_integral(const BlockVectorType& beta_in,
+                                 const BasisValuesMatrixType& M1,
+                                 const BasisValuesMatrixType& M2,
+                                 BlockVectorType& ret) const
   {
-    return cache_.end();
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      calculate_vector_integral_block(jj, beta_in.block(jj), M1[jj], M2[jj], ret.block(jj));
   }
 
-  ConstIteratorType end() const
+  void copy_transposed(const LocalMatrixType& T_k, LocalMatrixType& T_k_trans) const
   {
-    return cache_.end();
+    for (size_t ii = 0; ii < block_size; ++ii)
+      for (size_t kk = 0; kk <= ii; ++kk)
+        T_k_trans[kk][ii] = T_k[ii][kk];
   }
 
-private:
-  static RangeFieldType infinity_norm(const KeyVectorType& vec)
-  {
-    RangeFieldType ret = std::abs(vec[0]);
-    for (size_t ii = 1; ii < vec.size(); ++ii)
-      ret = std::max(ret, std::abs(vec[ii]));
-    return ret;
+  void apply_inverse_matrix_block(const size_t jj,
+                                  const LocalMatrixType& T_k,
+                                  XT::LA::CommonDenseMatrix<RangeFieldType>& M) const
+  {
+    const size_t num_quad_points = quad_points_[jj].size();
+    if (block_size == 2) {
+      const auto T_00_inv = 1 / T_k[0][0];
+      const auto T_11_inv = 1 / T_k[1][1];
+      for (size_t ll = 0; ll < num_quad_points; ++ll) {
+        auto* basis_ll = M.get_ptr(ll);
+        basis_ll[0] *= T_00_inv;
+        basis_ll[1] = (basis_ll[1] - T_k[1][0] * basis_ll[0]) * T_11_inv;
+      }
+    } else if (block_size == 4) {
+      FieldVector<RangeFieldType, 4> diag_inv;
+      for (size_t ii = 0; ii < 4; ++ii)
+        diag_inv[ii] = 1. / T_k[ii][ii];
+      for (size_t ll = 0; ll < num_quad_points; ++ll) {
+        auto* basis_ll = M.get_ptr(ll);
+        basis_ll[0] *= diag_inv[0];
+        basis_ll[1] = (basis_ll[1] - T_k[1][0] * basis_ll[0]) * diag_inv[1];
+        basis_ll[2] = (basis_ll[2] - T_k[2][0] * basis_ll[0] - T_k[2][1] * basis_ll[1]) * diag_inv[2];
+        basis_ll[3] =
+            (basis_ll[3] - T_k[3][0] * basis_ll[0] - T_k[3][1] * basis_ll[1] - T_k[3][2] * basis_ll[2]) * diag_inv[3];
+      }
+    } else {
+#  if HAVE_MKL || HAVE_CBLAS
+      thread_local LocalMatrixType T_k_trans(0.);
+      assert(num_quad_points < std::numeric_limits<int>::max());
+      // Calculate the transpose here first as this is much faster than passing the matrix to dtrsm and using
+      // CblasTrans
+      copy_transposed(T_k, T_k_trans);
+      XT::Common::Blas::dtrsm(XT::Common::Blas::row_major(),
+                              XT::Common::Blas::right(),
+                              XT::Common::Blas::upper(),
+                              XT::Common::Blas::no_trans(),
+                              XT::Common::Blas::non_unit(),
+                              static_cast<int>(num_quad_points),
+                              block_size,
+                              1.,
+                              &(T_k_trans[0][0]),
+                              block_size,
+                              M.data(),
+                              block_size);
+#  else
+      LocalVectorType tmp_vec, tmp_vec2;
+      for (size_t ll = 0; ll < num_quad_points; ++ll) {
+        std::copy_n(M.get_ptr(ll), block_size, tmp_vec.begin());
+        XT::LA::solve_lower_triangular(T_k, tmp_vec2, tmp_vec);
+        std::copy_n(tmp_vec2.begin(), block_size, M.get_ptr(ll));
+      }
+#  endif
+    }
   }
 
-  size_t capacity_;
-  MapType cache_;
-  std::list<KeyVectorType> keys_;
-};
-
-
-template <class GridViewImp, class BasisfunctionImp>
-class EntropyBasedFluxFunction
-  : public XT::Functions::FluxFunctionInterface<XT::Grid::extract_entity_t<GridViewImp>,
-                                                BasisfunctionImp::dimRange,
-                                                BasisfunctionImp::dimDomain,
-                                                BasisfunctionImp::dimRange,
-                                                typename BasisfunctionImp::R>
-{
-  using BaseType = typename XT::Functions::FluxFunctionInterface<XT::Grid::extract_entity_t<GridViewImp>,
-                                                                 BasisfunctionImp::dimRange,
-                                                                 BasisfunctionImp::dimDomain,
-                                                                 BasisfunctionImp::dimRange,
-                                                                 typename BasisfunctionImp::R>;
-  using ThisType = EntropyBasedFluxFunction;
-
-public:
-  using GridViewType = GridViewImp;
-  using BasisfunctionType = BasisfunctionImp;
-  using IndexSetType = typename GridViewType::IndexSet;
-  static const size_t basis_dimDomain = BasisfunctionType::dimDomain;
-  static const size_t basis_dimRange = BasisfunctionType::dimRange;
-  using typename BaseType::DomainType;
-  using typename BaseType::E;
-  using typename BaseType::LocalFunctionType;
-  using typename BaseType::RangeFieldType;
-  using typename BaseType::StateType;
-  using ImplementationType = EntropyBasedLocalFlux<BasisfunctionType>;
-  using AlphaReturnType = typename ImplementationType::AlphaReturnType;
-  using VectorType = typename ImplementationType::VectorType;
-  using LocalCacheType = EntropyLocalCache<StateType, VectorType>;
-  static const size_t cache_size = 4 * basis_dimDomain + 2;
-
-  explicit EntropyBasedFluxFunction(
-      const GridViewType& grid_view,
-      const BasisfunctionType& basis_functions,
-      const RangeFieldType tau = 1e-9,
-      const RangeFieldType epsilon_gamma = 0.01,
-      const RangeFieldType chi = 0.5,
-      const RangeFieldType xi = 1e-3,
-      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
-      const size_t k_0 = 500,
-      const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const std::string name = static_id())
-    : index_set_(grid_view.indexSet())
-    , entity_caches_(index_set_.size(0), LocalCacheType(cache_size))
-    , mutexes_(index_set_.size(0))
-    , implementation_(basis_functions, tau, epsilon_gamma, chi, xi, r_sequence, k_0, k_max, epsilon, name)
-  {}
-
-  static const constexpr bool available = true;
-
-  class Localfunction : public LocalFunctionType
+  void apply_inverse_matrix(const BlockMatrixType& T_k, BasisValuesMatrixType& M) const
   {
-    using BaseType = LocalFunctionType;
-
-  public:
-    using typename BaseType::E;
-    using typename BaseType::JacobianRangeReturnType;
-    using typename BaseType::RangeReturnType;
-
-    Localfunction(const IndexSetType& index_set,
-                  std::vector<LocalCacheType>& entity_caches,
-                  std::vector<std::mutex>& mutexes,
-                  const ImplementationType& implementation)
-      : index_set_(index_set)
-      , thread_cache_(cache_size)
-      , entity_caches_(entity_caches)
-      , mutexes_(mutexes)
-      , implementation_(implementation)
-    {}
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      apply_inverse_matrix_block(jj, T_k.block(jj), M[jj]);
+  }
 
-    virtual void post_bind(const E& element) override final
+  template <size_t domainDim = basis_dimDomain, class anything = void>
+  struct helper
+  {
+    static void jacobian(const BasisValuesMatrixType& M,
+                         BlockMatrixType& H,
+                         DerivativeRangeReturnType& ret,
+                         const ThisType* entropy_flux)
     {
-      const auto index = index_set_.index(element);
-      entity_cache_ = &(entity_caches_[index]);
-      mutex_ = &(mutexes_[index]);
-    }
+      for (size_t dd = 0; dd < domainDim; ++dd)
+        entropy_flux->row_jacobian(dd, M, H, ret[dd], dd > 0);
+    } // void jacobian(...)
 
-    virtual int order(const XT::Common::Parameter&) const override final
+#  if HAVE_QHULL
+    static void calculate_plane_coefficients(const MomentBasis& basis_functions)
     {
-      return 1.;
+      if (!basis_functions.plane_coefficients()[0].size())
+        basis_functions.calculate_plane_coefficients();
     }
 
-    std::unique_ptr<AlphaReturnType> get_alpha(const StateType& u, const bool regularize) const
+    static bool is_realizable(const BlockVectorType& u, const MomentBasis& basis_functions)
     {
-      // find starting point. Candidates: alpha_iso and the entries in the two caches
-      std::lock_guard<std::mutex> DUNE_UNUSED(guard)(*mutex_);
-      const auto& basis_functions = implementation_.basis_functions();
-      static const StateType u_iso = basis_functions.u_iso();
-      static const StateType alpha_iso = basis_functions.alpha_iso();
-      static const StateType alpha_iso_prime = basis_functions.alpha_iso_prime();
-      const auto density = basis_functions.density(u);
-      const auto u_iso_scaled = u_iso * density;
-      // calculate (inf-norm) distance to isotropic moment with same density
-      RangeFieldType distance = (u - u_iso_scaled).infinity_norm();
-      StateType alpha_start = alpha_iso + alpha_iso_prime * std::log(density);
-      if (!XT::Common::FloatCmp::eq(distance, 0.)) {
-        // calculate distance to closest moment in entity_cache
-        const auto entity_cache_dist_and_it = entity_cache_->find_closest(u);
-        const auto& entity_cache_dist = entity_cache_dist_and_it.first;
-        if (entity_cache_dist < distance) {
-          distance = entity_cache_dist;
-          alpha_start = entity_cache_dist_and_it.second->second;
-        }
-        if (!XT::Common::FloatCmp::eq(distance, 0.)) {
-          // calculate distance to closest moment in thread_cache
-          const auto thread_cache_dist_and_it = thread_cache_.find_closest(u);
-          const auto& thread_cache_dist = thread_cache_dist_and_it.first;
-          if (thread_cache_dist < distance) {
-            distance = thread_cache_dist;
-            alpha_start = thread_cache_dist_and_it.second->second;
-          }
-        }
-      }
-      // If alpha_start is already the solution, we are finished. Else start optimization.
-      if (XT::Common::FloatCmp::eq(distance, 0.)) {
-        return std::make_unique<AlphaReturnType>(std::make_pair(alpha_start, std::make_pair(u, 0.)));
-      } else {
-        auto ret = implementation_.get_alpha(u, alpha_start, regularize);
-        entity_cache_->insert(ret->second.first, ret->first);
-        thread_cache_.insert(ret->second.first, ret->first);
-        return std::move(ret);
-      }
+      for (size_t jj = 0; jj < num_blocks; ++jj)
+        for (const auto& coeff : basis_functions.plane_coefficients()[jj])
+          if (!(u.block(jj) * coeff.first < coeff.second))
+            return false;
+      return true;
     }
-
-    virtual RangeReturnType evaluate(const DomainType& /*point_in_reference_element*/,
-                                     const StateType& u,
-                                     const XT::Common::Parameter& /*param*/ = {}) const override final
+#  else
+    static void calculate_plane_coefficients(const MomentBasis& /*basis_functions*/)
     {
-      const auto alpha = get_alpha(u, true)->first;
-      return implementation_.evaluate_with_alpha(alpha);
+      DUNE_THROW(Dune::NotImplemented, "You are missing Qhull!");
     }
 
-    virtual JacobianRangeReturnType jacobian(const DomainType& /*point_in_reference_element*/,
-                                             const StateType& u,
-                                             const XT::Common::Parameter& /*param*/ = {}) const override final
+    static bool is_realizable(const BlockVectorType& /*u*/, const MomentBasis& /*basis_functions*/)
     {
-      const auto alpha = get_alpha(u, true)->first;
-      return implementation_.jacobian_with_alpha(alpha);
+      DUNE_THROW(Dune::NotImplemented, "You are missing Qhull!");
+      return false;
     }
+#  endif
+  }; // class helper<...>
 
-  private:
-    const IndexSetType& index_set_;
-    mutable LocalCacheType thread_cache_;
-    std::vector<LocalCacheType>& entity_caches_;
-    std::vector<std::mutex>& mutexes_;
-    const ImplementationType& implementation_;
-    LocalCacheType* entity_cache_;
-    std::mutex* mutex_;
-  }; // class Localfunction
-
-  static std::string static_id()
+  template <class anything>
+  struct helper<1, anything>
   {
-    return "dune.gdt.entropybasedflux";
-  }
+    static void jacobian(const BasisValuesMatrixType& M,
+                         BlockMatrixType& H,
+                         DerivativeRangeReturnType& ret,
+                         const ThisType* entropy_flux)
+    {
+      entropy_flux->row_jacobian(0, M, H, ret, false);
+    } // void jacobian(...)
 
-  virtual bool x_dependent() const override final
-  {
-    return false;
-  }
+    static void calculate_plane_coefficients(const MomentBasis& /*basis_functions*/) {}
 
-  virtual std::unique_ptr<LocalFunctionType> local_function() const override final
-  {
-    return std::make_unique<Localfunction>(index_set_, entity_caches_, mutexes_, implementation_);
-  }
+    static bool is_realizable(const BlockVectorType& u, const MomentBasis& basis_functions)
+    {
+      for (size_t jj = 0; jj < num_blocks; ++jj) {
+        const auto& u0 = u.block(jj)[0];
+        const auto& u1 = u.block(jj)[1];
+        const auto& v0 = basis_functions.triangulation()[jj];
+        const auto& v1 = basis_functions.triangulation()[jj + 1];
+        bool ret = (u0 >= 0) && (u1 <= v1 * u0) && (v0 * u0 <= u1);
+        if (!ret)
+          return false;
+      } // jj
+      return true;
+    }
+  }; // class helper<1, ...>
 
-  virtual std::unique_ptr<Localfunction> derived_local_function() const
+  void row_jacobian(const size_t row,
+                    const BasisValuesMatrixType& M,
+                    BlockMatrixType& H,
+                    RowDerivativeRangeReturnType& ret,
+                    bool L_calculated = false) const
   {
-    return std::make_unique<Localfunction>(index_set_, entity_caches_, mutexes_, implementation_);
-  }
+    assert(row < basis_dimDomain);
+    calculate_J(M, ret, row);
+    calculate_A_Binv(ret, H, L_calculated);
+  } // void partial_u_col(...)
 
-  StateType evaluate_kinetic_flux(const E& inside_entity,
-                                  const E& outside_entity,
-                                  const StateType& u_i,
-                                  const StateType& u_j,
-                                  const DomainType& n_ij,
-                                  const size_t dd) const
-  {
-    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
-    const auto local_func = derived_local_function();
-    local_func->bind(inside_entity);
-    const auto alpha_i = local_func->get_alpha(u_i, true)->first;
-    local_func->bind(outside_entity);
-    const auto alpha_j = local_func->get_alpha(u_j, true)->first;
-    return implementation_.evaluate_kinetic_flux_with_alphas(alpha_i, alpha_j, n_ij, dd);
-  } // StateType evaluate_kinetic_flux(...)
+  // calculates A = A B^{-1}. B is assumed to be symmetric positive definite.
+  static void calculate_A_Binv(FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>& A,
+                               BlockMatrixType& B,
+                               bool L_calculated = false)
+  {
+    // if B = LL^T, then we have to calculate ret = A (L^T)^{-1} L^{-1} = C L^{-1}
+    // calculate B = LL^T first
+    if (!L_calculated) {
+      for (size_t jj = 0; jj < num_blocks; ++jj)
+        XT::LA::cholesky(B.block(jj));
+    }
+    FieldVector<RangeFieldType, block_size> tmp_vec;
+    FieldVector<RangeFieldType, block_size> tmp_A_row;
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      // calculate C = A (L^T)^{-1}
+      const auto offset = block_size * jj;
+      for (size_t ii = 0; ii < block_size; ++ii) {
+        for (size_t kk = 0; kk < block_size; ++kk)
+          tmp_A_row[kk] = A[offset + ii][offset + kk];
+        XT::LA::solve_lower_triangular(B.block(jj), tmp_vec, tmp_A_row);
+        // calculate ret = C L^{-1}
+        XT::LA::solve_lower_triangular_transposed(B.block(jj), tmp_A_row, tmp_vec);
+        for (size_t kk = 0; kk < block_size; ++kk)
+          A[offset + ii][offset + kk] = tmp_A_row[kk];
+      } // ii
+    } // jj
+  } // void calculate_A_Binv(...)
 
-  virtual std::string name() const override final
+  void calculate_hessian(const BlockVectorType& alpha, const BasisValuesMatrixType& M, BlockMatrixType& H) const
   {
-    return implementation_.name();
-  }
+    auto& work_vec = working_storage();
+    calculate_scalar_products(alpha, M, work_vec);
+    apply_exponential(work_vec);
+    // matrix is symmetric, we only use lower triangular part
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      std::fill(H.block(jj).begin(), H.block(jj).end(), 0.);
+      const size_t num_quad_points = quad_weights_[jj].size();
+      for (size_t ll = 0; ll < num_quad_points; ++ll) {
+        auto factor_ll = work_vec[jj][ll] * quad_weights_[jj][ll];
+        const auto* basis_ll = M[jj].get_ptr(ll);
+        for (size_t ii = 0; ii < block_size; ++ii) {
+          auto* H_row = &(H.block(jj)[ii][0]);
+          const auto factor_ll_ii = basis_ll[ii] * factor_ll;
+          for (size_t kk = 0; kk <= ii; ++kk)
+            H_row[kk] += basis_ll[kk] * factor_ll_ii;
+        } // ii
+      } // ll
+    } // jj
+  } // void calculate_hessian(...)
+
+  // J = df/dalpha is the derivative of the flux with respect to alpha.
+  // As F = (f_1, f_2, f_3) is matrix-valued
+  // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
+  // vector-valued),
+  // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
+  // this function returns the dd-th matrix df_dd/dalpha of J
+  // assumes work_vecs already contains the needed exp(alpha * m) values
+  void calculate_J(const BasisValuesMatrixType& M,
+                   Dune::FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>& J_dd,
+                   const size_t dd) const
+  {
+    assert(dd < basis_dimDomain);
+    std::fill(J_dd.begin(), J_dd.end(), 0.);
+    const auto& work_vec = working_storage();
+    // matrix is symmetric, we only use lower triangular part
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      const auto offset = jj * block_size;
+      const size_t num_quad_points = quad_weights_[jj].size();
+      for (size_t ll = 0; ll < num_quad_points; ++ll) {
+        auto factor_ll = work_vec[jj][ll] * quad_weights_[jj][ll] * quad_points_[jj][ll][dd];
+        const auto* basis_ll = M[jj].get_ptr(ll);
+        for (size_t ii = 0; ii < block_size; ++ii) {
+          auto* J_row = &(J_dd[offset + ii][0]);
+          const auto factor_ll_ii = basis_ll[ii] * factor_ll;
+          for (size_t kk = 0; kk <= ii; ++kk)
+            J_row[offset + kk] += basis_ll[kk] * factor_ll_ii;
+        } // ii
+      } // ll
+    } // jj
+    // symmetric update for upper triangular part of J
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      const auto offset = block_size * jj;
+      for (size_t mm = 0; mm < block_size; ++mm)
+        for (size_t nn = mm + 1; nn < block_size; ++nn)
+          J_dd[offset + mm][offset + nn] = J_dd[offset + nn][offset + mm];
+    }
+  } // void calculate_J(...)
 
-  const BasisfunctionType& basis_functions() const
+  void change_basis(const BlockVectorType& beta_in,
+                    BlockVectorType& v_k,
+                    BasisValuesMatrixType& P_k,
+                    BlockMatrixType& T_k,
+                    BlockVectorType& g_k,
+                    BlockVectorType& beta_out,
+                    BlockMatrixType& H) const
   {
-    return implementation_.basis_functions();
-  }
+    calculate_hessian(beta_in, P_k, H);
+    FieldVector<RangeFieldType, block_size> tmp_vec;
+    for (size_t jj = 0; jj < num_blocks; ++jj)
+      XT::LA::cholesky(H.block(jj));
+    const auto& L = H;
+    T_k.rightmultiply(L);
+    L.mtv(beta_in, beta_out);
+    for (size_t jj = 0; jj < num_blocks; ++jj) {
+      XT::LA::solve_lower_triangular(L.block(jj), tmp_vec, v_k.block(jj));
+      v_k.block(jj) = tmp_vec;
+    } // jj
+    apply_inverse_matrix(L, P_k);
+    calculate_vector_integral(beta_out, P_k, P_k, g_k);
+    g_k -= v_k;
+  } // void change_basis(...)
 
-private:
-  const IndexSetType& index_set_;
-  mutable std::vector<LocalCacheType> entity_caches_;
-  mutable std::vector<std::mutex> mutexes_;
-  ImplementationType implementation_;
+  const MomentBasis& basis_functions_;
+  QuadraturePointsType quad_points_;
+  QuadratureWeightsType quad_weights_;
+  BasisValuesMatrixType M_;
+  const RangeFieldType tau_;
+  const RangeFieldType epsilon_gamma_;
+  const RangeFieldType chi_;
+  const RangeFieldType xi_;
+  const std::vector<RangeFieldType> r_sequence_;
+  const size_t k_0_;
+  const size_t k_max_;
+  const RangeFieldType epsilon_;
+  LocalMatrixType T_minus_one_;
 };
+#endif
 
-template <class GridViewImp, class BasisfunctionImp>
-const size_t EntropyBasedFluxFunction<GridViewImp, BasisfunctionImp>::cache_size;
 
-#if 0
-#  if 0
-/** Analytical flux \mathbf{f}(\mathbf{u}) = < \mu \mathbf{m} G_{\hat{\alpha}(\mathbf{u})} >,
- * Simple backtracking Newton without change of basis
+#if 1
+/**
+ * Specialization of EntropyBasedFluxImplementation for 3D Hatfunctions
  */
-template <class BasisfunctionImp, class GridLayerImp, class U>
-class EntropyBasedLocalFlux
-  : public XT::Functions::LocalizableFluxFunctionInterface<typename GridLayerImp::template Codim<0>::Entity,
-                                                           typename BasisfunctionImp::DomainFieldType,
-                                                           BasisfunctionImp::dimFlux,
-                                                           U,
-                                                           0,
-                                                           typename BasisfunctionImp::RangeFieldType,
-                                                           BasisfunctionImp::dimRange,
-                                                           BasisfunctionImp::dimFlux>
+template <class D, class R, size_t dimRange_or_refinements>
+class EntropyBasedFluxImplementation<HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>>
+  : public XT::Functions::FunctionInterface<HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>::dimRange,
+                                            3,
+                                            HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>::dimRange,
+                                            R>
 {
-  using BaseType =
-      typename XT::Functions::LocalizableFluxFunctionInterface<typename GridLayerImp::template Codim<0>::Entity,
-                                                               typename BasisfunctionImp::DomainFieldType,
-                                                               BasisfunctionImp::dimFlux,
-                                                               U,
-                                                               0,
-                                                               typename BasisfunctionImp::RangeFieldType,
-                                                               BasisfunctionImp::dimRange,
-                                                               BasisfunctionImp::dimFlux>;
-  using ThisType = EntropyBasedLocalFlux;
-
 public:
-  using BasisfunctionType = BasisfunctionImp;
-  using GridLayerType = GridLayerImp;
-  using BaseType::dimDomain;
-  using BaseType::dimRange;
-  using BaseType::dimRangeCols;
+  using MomentBasis = HatFunctionMomentBasis<D, 3, R, dimRange_or_refinements, 1>;
+  using BaseType = typename XT::Functions::
+      FunctionInterface<MomentBasis::dimRange, MomentBasis::dimDomain, MomentBasis::dimRange, R>;
+  using ThisType = EntropyBasedFluxImplementation;
+  using BaseType::d;
+  using BaseType::r;
+  static const size_t basis_dimDomain = MomentBasis::dimDomain;
+  static const size_t basis_dimRange = MomentBasis::dimRange;
+  using typename BaseType::DerivativeRangeReturnType;
   using typename BaseType::DomainFieldType;
   using typename BaseType::DomainType;
-  using typename BaseType::EntityType;
-  using typename BaseType::LocalfunctionType;
-  using typename BaseType::PartialURangeType;
   using typename BaseType::RangeFieldType;
-  using typename BaseType::RangeType;
-  using typename BaseType::StateRangeType;
-  using typename BaseType::StateType;
-  // make matrices a little larger to align to 64 byte boundary
-  static constexpr size_t matrix_num_cols = dimRange % 8 ? dimRange : dimRange + (8 - dimRange % 8);
-  using MatrixType = XT::Common::FieldMatrix<RangeFieldType, dimRange, dimRange>;
-  using VectorType = XT::Common::FieldVector<RangeFieldType, dimRange>;
+  using typename BaseType::RangeReturnType;
+  using typename BaseType::RowDerivativeRangeReturnType;
+  using BasisDomainType = typename MomentBasis::DomainType;
+  using MatrixType = XT::Common::FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>;
   using DynamicRangeType = DynamicVector<RangeFieldType>;
-  using BasisValuesMatrixType = XT::LA::CommonDenseMatrix<RangeFieldType>;
-  using QuadratureRuleType = Dune::QuadratureRule<DomainFieldType, dimDomain>;
-  using AlphaReturnType = std::pair<VectorType, RangeFieldType>;
-  using LocalCacheType = EntropyLocalCache<StateRangeType, VectorType>;
-  using AlphaStorageType = std::map<DomainType, StateRangeType, XT::Common::VectorFloatLess>;
-  static const size_t cache_size = 4 * dimDomain + 2;
-
-  // get permutation instead of sorting directly to be able to sort two vectors the same way
-  // see
-  // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of
-  template <typename T, typename Compare>
-  std::vector<std::size_t> get_sort_permutation(const std::vector<T>& vec, const Compare& compare)
-  {
-    std::vector<std::size_t> p(vec.size());
-    std::iota(p.begin(), p.end(), 0);
-    std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); });
-    return p;
-  }
-
-  template <typename T>
-  void apply_permutation_in_place(std::vector<T>& vec, const std::vector<std::size_t>& p)
-  {
-    std::vector<bool> done(vec.size());
-    for (std::size_t i = 0; i < vec.size(); ++i) {
-      if (done[i]) {
-        continue;
-      }
-      done[i] = true;
-      std::size_t prev_j = i;
-      std::size_t j = p[i];
-      while (i != j) {
-        std::swap(vec[prev_j], vec[j]);
-        done[j] = true;
-        prev_j = j;
-        j = p[j];
-      }
-    }
-  }
-
-  // Joins duplicate quadpoints, vectors have to be sorted!
-  void join_duplicate_quadpoints(std::vector<DomainType>& quad_points, std::vector<RangeFieldType>& quad_weights)
-  {
-    // Index of first quad_point of several quad_points with the same position
-    size_t curr_index = 0;
-    std::vector<size_t> indices_to_remove;
-    for (size_t ll = 1; ll < quad_weights.size(); ++ll) {
-      if (XT::Common::FloatCmp::eq(quad_points[curr_index], quad_points[ll])) {
-        quad_weights[curr_index] += quad_weights[ll];
-        indices_to_remove.push_back(ll);
-      } else {
-        curr_index = ll;
-      }
-    } // ll
-    assert(indices_to_remove.size() < std::numeric_limits<int>::max());
-    // remove duplicate points, from back to front to avoid invalidating indices
-    for (int ll = static_cast<int>(indices_to_remove.size()) - 1; ll >= 0; --ll) {
-      quad_points.erase(quad_points.begin() + indices_to_remove[ll]);
-      quad_weights.erase(quad_weights.begin() + indices_to_remove[ll]);
-    }
-  }
+  using LocalVectorType = XT::Common::FieldVector<RangeFieldType, 3>;
+  using LocalMatrixType = XT::Common::FieldMatrix<RangeFieldType, 3, 3>;
+  using BasisValuesMatrixType = std::vector<std::vector<LocalVectorType>>;
+  using QuadraturePointsType = std::vector<std::vector<BasisDomainType>>;
+  using QuadratureWeightsType = std::vector<std::vector<RangeFieldType>>;
+#  if HAVE_EIGEN
+  using SparseMatrixType = typename XT::LA::Container<RangeFieldType, XT::LA::Backends::eigen_sparse>::MatrixType;
+  using VectorType = typename XT::LA::Container<RangeFieldType, XT::LA::Backends::eigen_sparse>::VectorType;
+#  else
+  using SparseMatrixType = typename XT::LA::Container<RangeFieldType, XT::LA::default_sparse_backend>::MatrixType;
+  using VectorType = typename XT::LA::Container<RangeFieldType, XT::LA::default_sparse_backend>::VectorType;
+#  endif
+  using AlphaReturnType = std::pair<VectorType, std::pair<DomainType, RangeFieldType>>;
 
-  explicit EntropyBasedLocalFlux(
-      const BasisfunctionType& basis_functions,
-      const GridLayerType& grid_layer,
-      const RangeFieldType tau = 1e-9,
-      const RangeFieldType epsilon_gamma = 0.01,
-      const RangeFieldType chi = 0.5,
-      const RangeFieldType xi = 1e-3,
-      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
-      const size_t k_0 = 500,
-      const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const std::string name = static_id())
-    : index_set_(grid_layer.indexSet())
-    , basis_functions_(basis_functions)
-    , quad_points_(basis_functions_.quadratures().merged().size())
-    , quad_weights_(quad_points_.size())
-    , M_(quad_points_.size(), matrix_num_cols, 0., 0)
+  explicit EntropyBasedFluxImplementation(const MomentBasis& basis_functions,
+                                          const RangeFieldType tau,
+                                          const RangeFieldType epsilon_gamma,
+                                          const RangeFieldType chi,
+                                          const RangeFieldType xi,
+                                          const std::vector<RangeFieldType> r_sequence,
+                                          const size_t k_0,
+                                          const size_t k_max,
+                                          const RangeFieldType epsilon)
+    : basis_functions_(basis_functions)
+    , quad_points_(basis_functions_.triangulation().faces().size())
+    , quad_weights_(basis_functions_.triangulation().faces().size())
+    , M_(basis_functions_.triangulation().faces().size())
     , tau_(tau)
     , epsilon_gamma_(epsilon_gamma)
     , chi_(chi)
@@ -2478,769 +1830,1191 @@ public:
     , k_0_(k_0)
     , k_max_(k_max)
     , epsilon_(epsilon)
-    , name_(name)
-    , cache_(index_set_.size(0), LocalCacheType(cache_size))
-    , alpha_storage_(index_set_.size(0))
-    , mutexes_(index_set_.size(0))
   {
-    size_t ll = 0;
-    for (const auto& quad_point : basis_functions_.quadratures().merged()) {
-      quad_points_[ll] = quad_point.position();
-      quad_weights_[ll] = quad_point.weight();
-      ++ll;
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& vertices = triangulation.vertices();
+    const auto& faces = triangulation.faces();
+    assert(vertices.size() == basis_dimRange);
+    // create pattern
+    XT::LA::SparsityPatternDefault pattern(basis_dimRange);
+    for (size_t vertex_index = 0; vertex_index < basis_dimRange; ++vertex_index) {
+      const auto& vertex = vertices[vertex_index];
+      const auto& adjacent_faces = triangulation.get_face_indices(vertex->position());
+      for (const auto& face_index : adjacent_faces) {
+        const auto& face = faces[face_index];
+        assert(face->vertices().size() == 3);
+        for (size_t jj = 0; jj < 3; ++jj)
+          pattern.insert(vertex_index, face->vertices()[jj]->index());
+      }
     }
-    // Join duplicate quad_points. For that purpose, first sort the vectors
-    const auto permutation = get_sort_permutation(quad_points_, XT::Common::VectorFloatLess{});
-    apply_permutation_in_place(quad_points_, permutation);
-    apply_permutation_in_place(quad_weights_, permutation);
-    // Now join duplicate quad_points by removing all quad_points with the same position except one and adding the
-    // weights of the removed points to the remaining point
-    join_duplicate_quadpoints(quad_points_, quad_weights_);
-    assert(quad_points_.size() == quad_weights_.size());
-    // evaluate basis functions and store in matrix
-    M_.resize(quad_points_.size(), matrix_num_cols);
-    for (ll = 0; ll < quad_points_.size(); ++ll) {
-      const auto val = basis_functions_.evaluate(quad_points_[ll]);
-      for (size_t ii = 0; ii < dimRange; ++ii)
-        M_.set_entry(ll, ii, val[ii]);
-    }
-  }
+    pattern.sort();
+    pattern_ = pattern;
+    // store basis evaluations
+    const auto& quadratures = basis_functions_.quadratures();
+    assert(quadratures.size() == faces.size());
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      for (const auto& quad_point : quadratures[jj]) {
+        quad_points_[jj].emplace_back(quad_point.position());
+        quad_weights_[jj].emplace_back(quad_point.weight());
+      }
+    } // jj
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      M_[jj] = std::vector<LocalVectorType>(quad_points_[jj].size());
+      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll)
+        M_[jj][ll] = basis_functions_.evaluate_on_face(quad_points_[jj][ll], jj);
+    } // jj
+  } // constructor
 
-  class Localfunction : public LocalfunctionType
+  virtual int order(const XT::Common::Parameter& /*param*/ = {}) const override
   {
-  public:
-    using LocalfunctionType::dimDomain;
-    using LocalfunctionType::dimRange;
-    using typename LocalfunctionType::ColPartialURangeType;
-    using typename LocalfunctionType::ColRangeType;
-
-    Localfunction(const EntityType& e,
-                  const BasisfunctionType& basis_functions,
-                  const std::vector<DomainType>& quad_points,
-                  const std::vector<RangeFieldType>& quad_weights,
-                  const BasisValuesMatrixType& M,
-                  const RangeFieldType tau,
-                  const RangeFieldType epsilon_gamma,
-                  const RangeFieldType chi,
-                  const RangeFieldType xi,
-                  const std::vector<RangeFieldType>& r_sequence,
-                  const size_t k_0,
-                  const size_t k_max,
-                  const RangeFieldType epsilon,
-                  LocalCacheType& cache,
-                  AlphaStorageType& alpha_storage,
-                  std::mutex& mutex
-#    if HAVE_CLP
-                  ,
-                  XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& lp)
-#    else
-                  )
-#    endif
-      : LocalfunctionType(e)
-      , basis_functions_(basis_functions)
-      , quad_points_(quad_points)
-      , quad_weights_(quad_weights)
-      , M_(M)
-      , tau_(tau)
-      , epsilon_gamma_(epsilon_gamma)
-      , chi_(chi)
-      , xi_(xi)
-      , r_sequence_(r_sequence)
-      , k_0_(k_0)
-      , k_max_(k_max)
-      , epsilon_(epsilon)
-      , cache_(cache)
-      , alpha_storage_(alpha_storage)
-      , mutex_(mutex)
-#    if HAVE_CLP
-      , realizability_helper_(basis_functions_, quad_points_, lp)
-#    else
-      , realizability_helper_(basis_functions_, quad_points_)
-#    endif
-    {}
-
-    template <class BasisFuncImp = BasisfunctionType, bool quadrature_contains_vertices = true, bool anything = true>
-    struct RealizabilityHelper;
-
-#    if HAVE_CLP
-    template <class BasisFuncImp, bool quadrature_contains_vertices, bool anything>
-    struct RealizabilityHelper
-    {
-      static_assert(std::is_same<BasisFuncImp, BasisfunctionType>::value, "BasisFuncImp has to be BasisfunctionType!");
-
-      RealizabilityHelper(const BasisfunctionType& basis_functions,
-                          const std::vector<DomainType>& quad_points,
-                          XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& lp)
-        : basis_functions_(basis_functions)
-        , quad_points_(quad_points)
-        , lp_(lp)
-      {}
-
-      // The ClpSimplex structure seems to get corrupted sometimes (maybe some problems with infs/NaNs?), so we
-      // reinitialize it if the stopping conditions is always false
-      void setup_linear_program(const bool reinitialize) const
-      {
-        if (!*lp_ || reinitialize) {
-          // We start with creating a model with dimRange rows and num_quad_points columns */
-          constexpr int num_rows = static_cast<int>(dimRange);
-          assert(quad_points_.size() < std::numeric_limits<int>::max());
-          int num_cols = static_cast<int>(quad_points_.size()); /* variables are x_1, ..., x_{num_quad_points} */
-          *lp_ = std::make_unique<ClpSimplex>(false);
-          auto& lp = **lp_;
-          // set number of rows
-          lp.resize(num_rows, 0);
-
-          // Clp wants the row indices that are non-zero in each column. We have a dense matrix, so provide all indices
-          // 0..num_rows
-          std::array<int, num_rows> row_indices;
-          for (int ii = 0; ii < num_rows; ++ii)
-            row_indices[static_cast<size_t>(ii)] = ii;
-
-          // set columns for quadrature points
-          for (int ii = 0; ii < num_cols; ++ii) {
-            const auto v_i = basis_functions_.evaluate(quad_points_[static_cast<size_t>(ii)]);
-            // First argument: number of elements in column
-            // Second/Third argument: indices/values of column entries
-            // Fourth/Fifth argument: lower/upper column bound, i.e. lower/upper bound for x_i. As all x_i should be
-            // positive, set to 0/inf, which is the default.
-            // Sixth argument: Prefactor in objective for x_i, this is 0 for all x_i, which is also the default;
-            lp.addColumn(num_rows, row_indices.data(), &(v_i[0]));
-          }
-
-          // silence lp
-          lp.setLogLevel(0);
-        } // if (!lp_)
-      }
+    return 1;
+  }
 
-      bool is_realizable(const StateRangeType& u, const bool reinitialize) const
-      {
-        const auto density = basis_functions_.density(u);
-        if (!(density > 0.) || std::isinf(density))
-          return false;
-        const auto u_prime = u / density;
-        setup_linear_program(reinitialize);
-        auto& lp = **lp_;
-        constexpr int num_rows = static_cast<int>(dimRange);
-        // set rhs (equality constraints, so set both bounds equal
-        for (int ii = 0; ii < num_rows; ++ii) {
-          size_t uii = static_cast<size_t>(ii);
-          lp.setRowLower(ii, u_prime[uii]);
-          lp.setRowUpper(ii, u_prime[uii]);
-        }
-        // set maximal wall time. If this is not set, in rare cases the primal method never returns
-        lp.setMaximumWallSeconds(60);
-        // Now check solvability
-        lp.primal();
-        return lp.primalFeasible();
-      }
+  VectorType get_isotropic_alpha(const DomainType& u) const
+  {
+    static const auto alpha_iso = basis_functions_.alpha_iso();
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    const auto ret_dynvector = alpha_iso + alpha_iso_prime * std::log(basis_functions_.density(u));
+    VectorType ret(ret_dynvector.size());
+    for (size_t ii = 0; ii < ret.size(); ++ii)
+      ret[ii] = ret_dynvector[ii];
+    return ret;
+  }
 
-    private:
-      const BasisfunctionType& basis_functions_;
-      const std::vector<DomainType>& quad_points_;
-      XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& lp_;
-    }; // struct RealizabilityHelper<...>
-#    else // HAVE_CLP
-    template <class BasisFuncImp, bool quadrature_contains_vertices, bool anything>
-    struct RealizabilityHelper
-    {
-      RealizabilityHelper(const BasisfunctionType& /*basis_functions*/, const std::vector<DomainType>& /*quad_points*/)
-      {
-        DUNE_THROW(Dune::NotImplemented, "You are missing Clp!");
-      }
+  virtual RangeReturnType evaluate(const DomainType& u,
+                                   const XT::Common::Parameter& /*param*/ = {}) const override final
+  {
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
+    return evaluate_with_alpha(alpha);
+  }
 
-      bool is_realizable(const StateRangeType& /*u*/, const bool /*reinitialize*/) const
-      {
-        DUNE_THROW(Dune::NotImplemented, "You are missing Clp!");
-        return false;
-      }
-    }; // struct RealizabilityHelper<...>
-#    endif // HAVE_CLP
-
-    // specialization for hatfunctions
-    template <size_t dimRange_or_refinements, bool anything>
-    struct RealizabilityHelper<
-        HatFunctionMomentBasis<DomainFieldType, dimDomain, RangeFieldType, dimRange_or_refinements, 1, dimDomain>,
-        true,
-        anything>
-    {
-      RealizabilityHelper(const BasisfunctionType& /*basis_functions*/,
-                          const std::vector<DomainType>& /*quad_points*/
-#    if HAVE_CLP
-                          ,
-                          XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>>& /*lp*/)
-#    else
-      )
-#    endif
-      {}
+  virtual RangeReturnType evaluate_with_alpha(const VectorType& alpha) const
+  {
+    RangeReturnType ret(0.);
+    LocalVectorType local_alpha, local_ret;
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    for (size_t dd = 0; dd < basis_dimDomain; ++dd) {
+      // calculate ret[dd] = < omega[dd] m G_\alpha(u) >
+      for (size_t jj = 0; jj < faces.size(); ++jj) {
+        local_ret *= 0.;
+        const auto& face = faces[jj];
+        const auto& vertices = face->vertices();
+        for (size_t ii = 0; ii < 3; ++ii)
+          local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
+        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+          const auto& basis_ll = M_[jj][ll];
+          auto factor_ll = std::exp(local_alpha * basis_ll) * quad_points_[jj][ll][dd] * quad_weights_[jj][ll];
+          for (size_t ii = 0; ii < 3; ++ii)
+            local_ret[ii] += basis_ll[ii] * factor_ll;
+        } // ll (quad points)
+        for (size_t ii = 0; ii < 3; ++ii)
+          ret[dd][vertices[ii]->index()] += local_ret[ii];
+      } // jj (faces)
+    } // dd
+    return ret;
+  } // void evaluate(...)
 
-      static bool is_realizable(const StateRangeType& u, const bool /*reinitialize*/)
-      {
-        for (const auto& u_i : u)
-          if (!(u_i > 0.) || std::isinf(u_i))
-            return false;
-        return true;
-      }
-    }; // struct RealizabilityHelper<Hatfunctions, ...>
+  virtual DerivativeRangeReturnType jacobian(const DomainType& u,
+                                             const XT::Common::Parameter& /*param*/ = {}) const override final
+  {
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
+    return jacobian_with_alpha(alpha);
+  }
 
-    void keep(const StateRangeType& u)
-    {
-      cache_.keep(u);
+  virtual DerivativeRangeReturnType jacobian_with_alpha(const VectorType& alpha) const
+  {
+    DerivativeRangeReturnType ret;
+    thread_local SparseMatrixType H(basis_dimRange, basis_dimRange, pattern_, 0);
+    thread_local SparseMatrixType J(basis_dimRange, basis_dimRange, pattern_, 0);
+    calculate_hessian(alpha, M_, H);
+    for (size_t dd = 0; dd < basis_dimDomain; ++dd) {
+      calculate_J(M_, J, dd);
+      calculate_J_Hinv(J, H, ret[dd]);
     }
+    return ret;
+  } // ... jacobian(...)
 
-    using LocalfunctionType::entity;
-
-    // temporary vectors to store inner products and exponentials
-    std::vector<RangeFieldType>& working_storage() const
-    {
-      thread_local std::vector<RangeFieldType> work_vec;
-      work_vec.resize(quad_points_.size());
-      return work_vec;
-    }
+  DomainType evaluate_kinetic_flux(const DomainType& u_i,
+                                   const DomainType& u_j,
+                                   const BasisDomainType& n_ij,
+                                   const size_t dd) const
+  {
+    const auto alpha_i = get_alpha(u_i, get_isotropic_alpha(u_i), true)->first;
+    const auto alpha_j = get_alpha(u_j, get_isotropic_alpha(u_j), true)->first;
+    evaluate_kinetic_flux_with_alphas(alpha_i, alpha_j, n_ij, dd);
+  } // DomainType evaluate_kinetic_flux(...)
 
-    void calculate_scalar_products(const VectorType& beta_in,
-                                   const BasisValuesMatrixType& M,
-                                   std::vector<RangeFieldType>& scalar_products) const
-    {
-#    if HAVE_MKL || HAVE_CBLAS
-      XT::Common::Blas::dgemv(XT::Common::Blas::row_major(),
-                              XT::Common::Blas::no_trans(),
-                              static_cast<int>(quad_points_.size()),
-                              dimRange,
-                              1.,
-                              M.data(),
-                              matrix_num_cols,
-                              &(beta_in[0]),
-                              1,
-                              0.,
-                              scalar_products.data(),
-                              1);
-#    else
-      const size_t num_quad_points = quad_points_.size();
-      std::fill(scalar_products.begin(), scalar_products.end(), 0.);
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        const auto* basis_ll = M.get_ptr(ll);
-        scalar_products[ll] = std::inner_product(beta_in.begin(), beta_in.end(), basis_ll, 0.);
+  DomainType evaluate_kinetic_flux_with_alphas(const VectorType& alpha_i,
+                                               const VectorType& alpha_j,
+                                               const BasisDomainType& n_ij,
+                                               const size_t dd) const
+  {
+    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
+    DomainType ret(0);
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    LocalVectorType local_alpha_i, local_alpha_j, local_ret;
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      local_ret *= 0.;
+      const auto& face = faces[jj];
+      const auto& vertices = face->vertices();
+      for (size_t ii = 0; ii < 3; ++ii) {
+        local_alpha_i[ii] = alpha_i.get_entry(vertices[ii]->index());
+        local_alpha_j[ii] = alpha_j.get_entry(vertices[ii]->index());
       }
-#    endif
-    }
-
-    void apply_exponential(std::vector<RangeFieldType>& values) const
-    {
-      assert(values.size() < std::numeric_limits<int>::max());
-      XT::Common::Mkl::exp(static_cast<int>(values.size()), values.data(), values.data());
-    }
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M_[jj][ll];
+        const auto position = quad_points_[jj][ll][dd];
+        RangeFieldType factor =
+            position * n_ij[dd] > 0. ? std::exp(local_alpha_i * basis_ll) : std::exp(local_alpha_j * basis_ll);
+        factor *= quad_weights_[jj][ll] * position;
+        for (size_t ii = 0; ii < 3; ++ii)
+          local_ret[ii] += basis_ll[ii] * factor;
+      } // ll (quad points)
+      for (size_t ii = 0; ii < 3; ++ii)
+        ret[vertices[ii]->index()] += local_ret[ii];
+    } // jj (faces)
+    ret *= n_ij[dd];
+    return ret;
+  } // DomainType evaluate_kinetic_flux(...)
 
-    // calculate ret = \int (exp(beta_in * m))
-    RangeFieldType calculate_scalar_integral(const VectorType& beta_in, const BasisValuesMatrixType& M) const
-    {
-      auto& work_vec = working_storage();
-      calculate_scalar_products(beta_in, M, work_vec);
-      apply_exponential(work_vec);
-      return std::inner_product(quad_weights_.begin(), quad_weights_.end(), work_vec.begin(), RangeFieldType(0.));
-    }
+  const MomentBasis& basis_functions() const
+  {
+    return basis_functions_;
+  }
 
-    // calculate ret = \int (m1 exp(beta_in * m2))
-    void calculate_vector_integral(const VectorType& beta_in,
-                                   const BasisValuesMatrixType& M1,
-                                   const BasisValuesMatrixType& M2,
-                                   VectorType& ret,
-                                   bool same_beta = false,
-                                   bool only_first_component = false) const
-    {
-      auto& work_vec = working_storage();
-      if (!same_beta) {
-        calculate_scalar_products(beta_in, M2, work_vec);
-        apply_exponential(work_vec);
-      }
-      std::fill(ret.begin(), ret.end(), 0.);
-      const size_t num_quad_points = quad_weights_.size();
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        const auto factor_ll = work_vec[ll] * quad_weights_[ll];
-        const auto* basis_ll = M1.get_ptr(ll);
-        for (size_t ii = 0; ii < (only_first_component ? 1 : dimRange); ++ii)
-          ret[ii] += basis_ll[ii] * factor_ll;
-      } // ll
-    }
+  std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const
+  {
+    auto ret = std::make_unique<AlphaReturnType>();
 
-    void store_alpha(const DomainType& x_local, const StateRangeType& alpha)
-    {
-      alpha_storage_[x_local] = alpha;
-    }
+    // rescale u such that the density <psi> is 1
+    RangeFieldType density = basis_functions_.density(u);
+    if (!(density > 0.) || std::isinf(density))
+      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
 
-    StateRangeType get_stored_alpha(const DomainType& x_local) const
-    {
-      return alpha_storage_.at(x_local);
-    }
+    VectorType u_prime(basis_dimRange, 0., 0);
+    for (size_t ii = 0; ii < basis_dimRange; ++ii)
+      u_prime.set_entry(ii, u[ii] / density);
+    VectorType alpha_iso_prime(basis_dimRange, 0., 0);
+    basis_functions_.alpha_iso_prime(alpha_iso_prime);
 
-    template <class GridLayerType>
-    void center_results_to_intersections(const GridLayerType& grid_layer)
-    {
-      const auto center = entity().geometry().local(entity().geometry().center());
-      const auto center_alpha = get_stored_alpha(center);
-      for (const auto& intersection : Dune::intersections(grid_layer, entity()))
-        store_alpha(entity().geometry().local(intersection.geometry().center()), center_alpha);
-    }
+    // if value has already been calculated for these values, skip computation
+    RangeFieldType tau_prime = std::min(
+        tau_ / ((1 + std::sqrt(basis_dimRange) * u_prime.l2_norm()) * density + std::sqrt(basis_dimRange) * tau_),
+        tau_);
+    thread_local SparseMatrixType H(basis_dimRange, basis_dimRange, pattern_, 0);
+    thread_local auto solver = XT::LA::make_solver(H);
 
-    std::unique_ptr<AlphaReturnType> get_alpha(const DomainType& x_local,
-                                               const StateRangeType& u,
-                                               const XT::Common::Parameter& param,
-                                               const bool regularize) const
-    {
-      const bool boundary = bool(param.get("boundary")[0]);
-      // get initial multiplier and basis matrix from last time step
-      auto ret = std::make_unique<AlphaReturnType>();
-      mutex_.lock();
-      if (boundary)
-        cache_.set_capacity(cache_size + dimDomain);
-
-      // rescale u such that the density <psi> is 1
-      RangeFieldType density = basis_functions_.density(u);
-      if (!(density > 0.) || std::isinf(density)) {
-        mutex_.unlock();
-        DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
+    // calculate moment vector for isotropic distribution
+    VectorType u_iso(basis_dimRange, 0., 0);
+    basis_functions_.u_iso(u_iso);
+    VectorType alpha_k = alpha_in - alpha_iso_prime * std::log(density);
+    VectorType v(basis_dimRange, 0., 0), g_k(basis_dimRange, 0., 0), d_k(basis_dimRange, 0., 0),
+        tmp_vec(basis_dimRange, 0., 0), alpha_prime(basis_dimRange);
+    const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
+    const auto r_max = r_sequence.back();
+    for (const auto& r : r_sequence_) {
+      // regularize u
+      v = u_prime;
+      if (r > 0) {
+        alpha_k = get_isotropic_alpha(u);
+        tmp_vec = u_iso;
+        tmp_vec *= r;
+        v *= 1 - r;
+        v += tmp_vec;
       }
-      VectorType u_prime = u / density;
-      auto alpha_iso_dyn = basis_functions_.alpha_iso();
-      VectorType alpha_iso;
-      for (size_t ii = 0; ii < dimRange; ++ii)
-        alpha_iso[ii] = alpha_iso_dyn[ii];
-      VectorType v, u_eps_diff, alpha_k;
-      RangeFieldType first_error_cond, second_error_cond, tau_prime;
-
-      // if value has already been calculated for these values, skip computation
-      const auto cache_iterator = cache_.find_closest(u_prime);
-      if (cache_iterator != cache_.end() && XT::Common::FloatCmp::eq(cache_iterator->first, u_prime, 1e-14, 1e-14)) {
-        const auto alpha_prime = cache_iterator->second;
-        ret->first = alpha_prime + alpha_iso * std::log(density);
-        ret->second = 0.;
-        alpha_storage_[x_local] = ret->first;
-        mutex_.unlock();
-        return ret;
-      } else {
-        auto u_iso = basis_functions_.u_iso();
-        const RangeFieldType dim_factor = is_full_moment_basis<BasisfunctionType>::value ? 1. : std::sqrt(dimDomain);
-        tau_prime = std::min(tau_ / ((1 + dim_factor * u_prime.two_norm()) * density + dim_factor * tau_), tau_);
 
-        // define further variables
-        VectorType g_k, d_k, tmp_vec, alpha_prime;
-        alpha_k = cache_iterator != cache_.end() ? cache_iterator->second : alpha_iso;
+      // calculate f_0
+      RangeFieldType f_k = calculate_f(alpha_k, v);
 
-        const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
-        const auto r_max = r_sequence.back();
-        for (const auto& r : r_sequence) {
-          // regularize u
-          v = u_prime;
-          if (r > 0) {
-            alpha_k = alpha_iso;
-            DynamicRangeType r_times_u_iso = u_iso;
-            r_times_u_iso *= r;
-            v *= 1 - r;
-            v += r_times_u_iso;
+      int pure_newton = 0;
+      for (size_t kk = 0; kk < k_max_; ++kk) {
+        // exit inner for loop to increase r if too many iterations are used
+        if (kk > k_0_ && r < r_max)
+          break;
+        // calculate gradient g
+        calculate_gradient(alpha_k, v, g_k);
+        // calculate Hessian H
+        calculate_hessian(alpha_k, M_, H, true);
+        // calculate descent direction d_k;
+        tmp_vec = g_k;
+        tmp_vec *= -1;
+        try {
+          solver.apply(tmp_vec, d_k);
+        } catch (const XT::LA::Exceptions::linear_solver_failed& error) {
+          if (r < r_max) {
+            break;
+          } else {
+            DUNE_THROW(XT::LA::Exceptions::linear_solver_failed,
+                       "Failure to converge, solver error was: " << error.what());
           }
-          // calculate T_k u
-          VectorType v_k = v;
-          // calculate f_0
-          RangeFieldType f_k = calculate_scalar_integral(alpha_k, M_);
-          f_k -= alpha_k * v_k;
-
-          thread_local auto H = XT::Common::make_unique<MatrixType>(0.);
+        }
 
-          int pure_newton = 0;
-          for (size_t kk = 0; kk < k_max_; ++kk) {
-            // exit inner for loop to increase r if too many iterations are used
-            if (kk > k_0_ && r < r_max)
+        const auto& alpha_tilde = alpha_k;
+        auto& u_alpha_tilde = tmp_vec;
+        u_alpha_tilde = g_k;
+        u_alpha_tilde += v;
+        auto density_tilde = basis_functions_.density(u_alpha_tilde);
+        if (!(density_tilde > 0.) || std::isinf(density_tilde))
+          break;
+        alpha_prime = alpha_iso_prime;
+        alpha_prime *= -std::log(density_tilde);
+        alpha_prime += alpha_tilde;
+        auto& u_eps_diff = tmp_vec;
+        calculate_u(alpha_prime, u_eps_diff); // store u_alpha_prime in u_eps_diff
+        u_eps_diff *= -(1 - epsilon_gamma_);
+        u_eps_diff += v;
+        // checking realizability is cheap so we do not need the second stopping criterion
+        if (g_k.l2_norm() < tau_prime && is_realizable(u_eps_diff)) {
+          ret->first = alpha_iso_prime;
+          ret->first *= std::log(density);
+          ret->first += alpha_prime;
+          auto v_ret_eig = v * density;
+          DomainType v_ret;
+          for (size_t ii = 0; ii < d; ++ii)
+            v_ret[ii] = v_ret_eig[ii];
+          ret->second = std::make_pair(v_ret, r);
+          return ret;
+        } else {
+          RangeFieldType zeta_k = 1;
+          // backtracking line search
+          auto& alpha_new = tmp_vec;
+          while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.l2_norm() / d_k.l2_norm()) {
+            // calculate alpha_new = alpha_k + zeta_k d_k
+            alpha_new = d_k;
+            alpha_new *= zeta_k;
+            alpha_new += alpha_k;
+            // calculate f(alpha_new)
+            RangeFieldType f_new = calculate_f(alpha_new, v);
+            if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
+              alpha_k = alpha_new;
+              f_k = f_new;
+              pure_newton = 0;
               break;
-            // calculate gradient g
-            calculate_vector_integral(alpha_k, M_, M_, g_k);
-            g_k -= v_k;
-            // calculate Hessian H
-            calculate_hessian(alpha_k, M_, *H, true);
-            // calculate descent direction d_k;
-            d_k = g_k;
-            d_k *= -1;
-            try {
-              // if H = LL^T, then we have to calculate d_k = - L^{-T} L^{-1} g_k
-              // calculate H = LL^T first
-              XT::LA::cholesky(*H);
-              VectorType tmp_vec;
-              // calculate d_tmp = -L^{-1} g_k and store in B
-              XT::LA::solve_lower_triangular(*H, tmp_vec, d_k);
-              // calculate d_k = L^{-T} d_tmp
-              XT::LA::solve_lower_triangular_transposed(*H, d_k, tmp_vec);
-            } catch (const Dune::MathError&) {
-              if (r < r_max)
-                break;
-              mutex_.unlock();
-              const std::string err_msg =
-                  "Failed to converge for " + XT::Common::to_string(u) + " with density "
-                  + XT::Common::to_string(density) + " and multiplier " + XT::Common::to_string(alpha_k)
-                  + " at position " + XT::Common::to_string(entity().geometry().center())
-                  + " due to errors in change_basis! Last u_eps_diff = " + XT::Common::to_string(u_eps_diff)
-                  + ", first_error_cond = " + XT::Common::to_string(first_error_cond) + ", second_error_cond = "
-                  + XT::Common::to_string(second_error_cond) + ", tau_prime = " + XT::Common::to_string(tau_prime);
-              DUNE_THROW(MathError, err_msg);
             }
+            zeta_k = chi_ * zeta_k;
+          } // backtracking linesearch while
+          // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
+          if (zeta_k <= epsilon_ * alpha_k.l2_norm() / d_k.l2_norm())
+            ++pure_newton;
+        } // else (stopping conditions)
+      } // k loop (Newton iterations)
+    } // r loop (Regularization parameter)
+    DUNE_THROW(MathError, "Failed to converge");
+    return ret;
+  } // ... get_alpha(...)
+
+private:
+  static bool is_realizable(const VectorType& u)
+  {
+    for (const auto& u_i : u)
+      if (!(u_i > 0.) || std::isinf(u_i))
+        return false;
+    return true;
+  }
+
+  // temporary vectors to store inner products and exponentials
+  std::vector<std::vector<RangeFieldType>>& get_work_vecs() const
+  {
+    thread_local std::vector<std::vector<RangeFieldType>> work_vecs;
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    work_vecs.resize(faces.size());
+    for (size_t jj = 0; jj < faces.size(); ++jj)
+      work_vecs[jj].resize(quad_points_[jj].size());
+    return work_vecs;
+  }
+
+private:
+  // calculates ret = J H^{-1}. H is assumed to be symmetric positive definite, which gives ret^T = H^{-T} J^T =
+  // H^{-1} J^T, so we just have to solve y = H^{-1} x for each row x of J
+  void calculate_J_Hinv(SparseMatrixType& J, const SparseMatrixType& H, RowDerivativeRangeReturnType& ret) const
+  {
+    thread_local VectorType solution(basis_dimRange, 0., 0), tmp_rhs(basis_dimRange, 0., 0);
+#  if HAVE_EIGEN
+    typedef ::Eigen::SparseMatrix<RangeFieldType, ::Eigen::ColMajor> ColMajorBackendType;
+    ColMajorBackendType colmajor_copy(H.backend());
+    colmajor_copy.makeCompressed();
+    typedef ::Eigen::SimplicialLDLT<ColMajorBackendType> SolverType;
+    SolverType solver;
+    solver.analyzePattern(colmajor_copy);
+    solver.factorize(colmajor_copy);
+#  else // HAVE_EIGEN
+    auto solver = XT::LA::make_solver(H);
+#  endif // HAVE_EIGEN
+    for (size_t ii = 0; ii < basis_dimRange; ++ii) {
+      // copy row to VectorType
+      for (size_t kk = 0; kk < basis_dimRange; ++kk)
+        tmp_rhs.set_entry(kk, J.get_entry(ii, kk));
+        // solve
+#  if HAVE_EIGEN
+      solution.backend() = solver.solve(tmp_rhs.backend());
+#  else // HAVE_EIGEN
+      solver.apply(tmp_rhs, solution);
+#  endif
+      // copy result to C
+      for (size_t kk = 0; kk < basis_dimRange; ++kk)
+        ret[ii][kk] = solution.get_entry(kk);
+    }
+  } // void calculate_J_Hinv(...)
+
+  RangeFieldType calculate_f(const VectorType& alpha, const VectorType& v) const
+  {
+    RangeFieldType ret(0.);
+    XT::Common::FieldVector<RangeFieldType, 3> local_alpha;
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      const auto& face = faces[jj];
+      const auto& vertices = face->vertices();
+      for (size_t ii = 0; ii < 3; ++ii)
+        local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll)
+        ret += std::exp(local_alpha * M_[jj][ll]) * quad_weights_[jj][ll];
+    } // jj (faces)
+    ret -= alpha * v;
+    return ret;
+  } // void calculate_u(...)
+
+  void calculate_u(const VectorType& alpha, VectorType& u) const
+  {
+    u *= 0.;
+    LocalVectorType local_alpha, local_u;
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    auto& work_vecs = get_work_vecs();
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      const auto& face = faces[jj];
+      const auto& vertices = face->vertices();
+      local_u *= 0.;
+      for (size_t ii = 0; ii < 3; ++ii)
+        local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M_[jj][ll];
+        work_vecs[jj][ll] = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
+        for (size_t ii = 0; ii < 3; ++ii)
+          local_u[ii] += basis_ll[ii] * work_vecs[jj][ll];
+      } // ll (quad points)
+      for (size_t ii = 0; ii < 3; ++ii)
+        u.add_to_entry(vertices[ii]->index(), local_u[ii]);
+    } // jj (faces)
+  } // void calculate_u(...)
+
+  void calculate_gradient(const VectorType& alpha, const VectorType& v, VectorType& g_k) const
+  {
+    calculate_u(alpha, g_k);
+    g_k -= v;
+  }
+
+  void calculate_hessian(const VectorType& alpha,
+                         const BasisValuesMatrixType& M,
+                         SparseMatrixType& H,
+                         const bool use_work_vecs_results = false) const
+  {
+    H *= 0.;
+    LocalVectorType local_alpha;
+    LocalMatrixType H_local(0.);
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    auto& work_vecs = get_work_vecs();
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      H_local *= 0.;
+      const auto& face = faces[jj];
+      const auto& vertices = face->vertices();
+      for (size_t ii = 0; ii < 3; ++ii)
+        local_alpha[ii] = alpha.get_entry(vertices[ii]->index());
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M[jj][ll];
+        if (!use_work_vecs_results)
+          work_vecs[jj][ll] = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
+        for (size_t ii = 0; ii < 3; ++ii)
+          for (size_t kk = 0; kk < 3; ++kk)
+            H_local[ii][kk] += basis_ll[ii] * basis_ll[kk] * work_vecs[jj][ll];
+      } // ll (quad points)
+      for (size_t ii = 0; ii < 3; ++ii)
+        for (size_t kk = 0; kk < 3; ++kk)
+          H.add_to_entry(vertices[ii]->index(), vertices[kk]->index(), H_local[ii][kk]);
+    } // jj (faces)
+  } // void calculate_hessian(...)
+
+  // J = df/dalpha is the derivative of the flux with respect to alpha.
+  // As F = (f_1, f_2, f_3) is matrix-valued
+  // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
+  // vector-valued),
+  // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
+  // this function returns the dd-th matrix df_dd/dalpha of J
+  // assumes work_vecs already contains the needed exp(alpha * m) values
+  void calculate_J(const BasisValuesMatrixType& M, SparseMatrixType& J_dd, const size_t dd) const
+  {
+    assert(dd < basis_dimDomain);
+    J_dd *= 0.;
+    LocalMatrixType J_local(0.);
+    auto& work_vecs = get_work_vecs();
+    const auto& triangulation = basis_functions_.triangulation();
+    const auto& faces = triangulation.faces();
+    for (size_t jj = 0; jj < faces.size(); ++jj) {
+      J_local *= 0.;
+      const auto& face = faces[jj];
+      const auto& vertices = face->vertices();
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M[jj][ll];
+        for (size_t ii = 0; ii < 3; ++ii)
+          for (size_t kk = 0; kk < 3; ++kk)
+            J_local[ii][kk] += basis_ll[ii] * basis_ll[kk] * work_vecs[jj][ll] * quad_points_[jj][ll][dd];
+      } // ll (quad points)
+      for (size_t ii = 0; ii < 3; ++ii)
+        for (size_t kk = 0; kk < 3; ++kk)
+          J_dd.add_to_entry(vertices[ii]->index(), vertices[kk]->index(), J_local[ii][kk]);
+    } // jj (faces)
+  } // void calculate_J(...)
+
+  const MomentBasis& basis_functions_;
+  QuadraturePointsType quad_points_;
+  QuadratureWeightsType quad_weights_;
+  BasisValuesMatrixType M_;
+  const RangeFieldType tau_;
+  const RangeFieldType epsilon_gamma_;
+  const RangeFieldType chi_;
+  const RangeFieldType xi_;
+  const std::vector<RangeFieldType> r_sequence_;
+  const size_t k_0_;
+  const size_t k_max_;
+  const RangeFieldType epsilon_;
+  XT::LA::SparsityPatternDefault pattern_;
+};
+#endif
+
+#if 0
+/**
+ * Specialization of EntropyBasedFluxImplementation for 1D Hatfunctions (no change of basis, analytic integrals +
+ * Taylor)
+ */
+template <class D, class R, size_t dimRange>
+class EntropyBasedFluxImplementation<HatFunctionMomentBasis<D, 1, R, dimRange, 1>>
+  : public XT::Functions::FunctionInterface<dimRange, 1, dimRange, R>
+{
+  using BaseType = typename XT::Functions::FunctionInterface<dimRange, 1, dimRange, R>;
+  using ThisType = EntropyBasedFluxImplementation;
+
+public:
+  using MomentBasis = HatFunctionMomentBasis<D, 1, R, dimRange, 1>;
+  using BaseType::d;
+  using BaseType::r;
+  static const size_t basis_dimDomain = MomentBasis::dimDomain;
+  static const size_t basis_dimRange = dimRange;
+  using typename BaseType::DerivativeRangeReturnType;
+  using typename BaseType::DomainFieldType;
+  using typename BaseType::DomainType;
+  using typename BaseType::RangeFieldType;
+  using typename BaseType::RangeReturnType;
+  using typename BaseType::RowDerivativeRangeReturnType;
+  using BasisDomainType = typename MomentBasis::DomainType;
+  using MatrixType = XT::Common::FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>;
+  using VectorType = XT::Common::FieldVector<RangeFieldType, basis_dimRange>;
+  using AlphaReturnType = std::pair<VectorType, std::pair<DomainType, RangeFieldType>>;
+
+  explicit EntropyBasedFluxImplementation(const MomentBasis& basis_functions,
+                                          const RangeFieldType tau,
+                                          const RangeFieldType epsilon_gamma,
+                                          const RangeFieldType chi,
+                                          const RangeFieldType xi,
+                                          const std::vector<RangeFieldType> r_sequence,
+                                          const size_t k_0,
+                                          const size_t k_max,
+                                          const RangeFieldType epsilon,
+                                          const RangeFieldType taylor_tol = 0.1,
+                                          const size_t max_taylor_order = 200)
+    : basis_functions_(basis_functions)
+    , v_points_(basis_functions_.triangulation())
+    , tau_(tau)
+    , epsilon_gamma_(epsilon_gamma)
+    , chi_(chi)
+    , xi_(xi)
+    , r_sequence_(r_sequence)
+    , k_0_(k_0)
+    , k_max_(k_max)
+    , epsilon_(epsilon)
+    , taylor_tol_(taylor_tol)
+    , max_taylor_order_(max_taylor_order)
+  {}
+
+  static bool is_realizable(const DomainType& u)
+  {
+    for (const auto& u_i : u)
+      if (!(u_i > 0.) || std::isinf(u_i))
+        return false;
+    return true;
+  }
+
+  virtual int order(const XT::Common::Parameter& /*param*/) const override
+  {
+    return 1;
+  }
+
+  VectorType get_isotropic_alpha(const DomainType& u) const
+  {
+    static const auto alpha_iso = basis_functions_.alpha_iso();
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    return alpha_iso + alpha_iso_prime * std::log(basis_functions_.density(u));
+  }
+
+  virtual RangeReturnType evaluate(const DomainType& u,
+                                   const XT::Common::Parameter& /*param*/ = {}) const override final
+  {
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
+    return evaluate_with_alpha(alpha);
+  }
+
+  virtual RangeReturnType evaluate_with_alpha(const VectorType& alpha) const
+  {
+    RangeReturnType ret(0.);
+    // calculate < \mu m G_\alpha(u) >
+    for (size_t nn = 0; nn < dimRange; ++nn) {
+      if (nn > 0) {
+        if (std::abs(alpha[nn] - alpha[nn - 1]) > taylor_tol_) {
+          ret[0][nn] +=
+              2. * std::pow(v_points_[nn] - v_points_[nn - 1], 2) / std::pow(alpha[nn] - alpha[nn - 1], 3)
+                  * (std::exp(alpha[nn]) - std::exp(alpha[nn - 1]))
+              + (v_points_[nn] - v_points_[nn - 1]) / std::pow(alpha[nn] - alpha[nn - 1], 2)
+                    * (v_points_[nn - 1] * (std::exp(alpha[nn]) + std::exp(alpha[nn - 1]))
+                       - 2 * v_points_[nn] * std::exp(alpha[nn]))
+              + v_points_[nn] * (v_points_[nn] - v_points_[nn - 1]) / (alpha[nn] - alpha[nn - 1]) * std::exp(alpha[nn]);
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha[nn] - alpha[nn - 1];
+          size_t ll = 0;
+          auto pow_frac = 1. / 6.;
+          while (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(update, 0.)) {
+            update = pow_frac * ((ll * ll + 3 * ll + 2) * v_points_[nn] + (ll + 1) * v_points_[nn - 1]);
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 3);
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          ret[0][nn] += result * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha[nn - 1]);
+        }
+      }
+      if (nn < dimRange - 1) {
+        if (std::abs(alpha[nn + 1] - alpha[nn]) > taylor_tol_) {
+          ret[0][nn] +=
+              -2. * std::pow(v_points_[nn + 1] - v_points_[nn], 2) / std::pow(alpha[nn + 1] - alpha[nn], 3)
+                  * (std::exp(alpha[nn + 1]) - std::exp(alpha[nn]))
+              + (v_points_[nn + 1] - v_points_[nn]) / std::pow(alpha[nn + 1] - alpha[nn], 2)
+                    * (v_points_[nn + 1] * (std::exp(alpha[nn + 1]) + std::exp(alpha[nn]))
+                       - 2 * v_points_[nn] * std::exp(alpha[nn]))
+              - v_points_[nn] * (v_points_[nn + 1] - v_points_[nn]) / (alpha[nn + 1] - alpha[nn]) * std::exp(alpha[nn]);
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha[nn + 1] - alpha[nn];
+          size_t ll = 0;
+          auto pow_frac = 1. / 6.;
+          while (ll < 3 || (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(update, 0.))) {
+            update = pow_frac * (2 * v_points_[nn] + (ll + 1) * v_points_[nn + 1]);
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 3);
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          ret[0][nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha[nn]);
+        }
+      } // if (nn < dimRange - 1)
+    } // nn
+    return ret;
+  } // void evaluate_with_alpha(...)
+
+  virtual DerivativeRangeReturnType jacobian(const DomainType& u,
+                                             const XT::Common::Parameter& /*param*/ = {}) const override final
+  {
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
+    return jacobian_with_alpha(alpha);
+  }
+
+  virtual DerivativeRangeReturnType jacobian_with_alpha(const VectorType& alpha) const
+  {
+    DerivativeRangeReturnType ret;
+    VectorType H_diag, J_diag;
+    XT::Common::FieldVector<RangeFieldType, dimRange - 1> H_subdiag, J_subdiag;
+    calculate_hessian(alpha, H_diag, H_subdiag);
+    calculate_J(alpha, J_diag, J_subdiag);
+    calculate_J_Hinv(ret[0], J_diag, J_subdiag, H_diag, H_subdiag);
+    return ret;
+  }
+
+  // calculate \sum_{i=1}^d < v_i m \psi > n_i, where n is the unit outer normal,
+  // m is the basis function vector, phi_u is the ansatz corresponding to u
+  // and x, v, t are the space, velocity and time variable, respectively
+  // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
+  DomainType evaluate_kinetic_flux(const DomainType& u_i,
+                                   const DomainType& u_j,
+                                   const BasisDomainType& n_ij,
+                                   const size_t dd) const
+  {
+    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
+    const auto alpha_i = get_alpha(u_i, get_isotropic_alpha(u_i), true)->first;
+    const auto alpha_j = get_alpha(u_j, get_isotropic_alpha(u_j), true)->first;
+    evaluate_kinetic_flux_with_alphas(alpha_i, alpha_j, n_ij, dd);
+  } // DomainType evaluate_kinetic_flux(...)
+
+  DomainType evaluate_kinetic_flux_with_alphas(const VectorType& alpha_i,
+                                               const VectorType& alpha_j,
+                                               const BasisDomainType& n_ij,
+                                               const size_t dd) const
+  {
+    assert(dd == 0);
+    // calculate < \mu m G_\alpha(u) > * n_ij
+    DomainType ret(0);
+    for (size_t nn = 0; nn < dimRange; ++nn) {
+      if (nn > 0) {
+        if (dimRange % 2 || nn != dimRange / 2) {
+          const auto& alpha = (n_ij[0] * (v_points_[nn - 1] + v_points_[nn]) / 2. > 0.) ? alpha_i : alpha_j;
+          if (std::abs(alpha[nn] - alpha[nn - 1]) > taylor_tol_) {
+            ret[nn] += 2. * std::pow(v_points_[nn] - v_points_[nn - 1], 2) / std::pow(alpha[nn] - alpha[nn - 1], 3)
+                           * (std::exp(alpha[nn]) - std::exp(alpha[nn - 1]))
+                       + (v_points_[nn] - v_points_[nn - 1]) / std::pow(alpha[nn] - alpha[nn - 1], 2)
+                             * (v_points_[nn - 1] * (std::exp(alpha[nn]) + std::exp(alpha[nn - 1]))
+                                - 2 * v_points_[nn] * std::exp(alpha[nn]))
+                       + v_points_[nn] * (v_points_[nn] - v_points_[nn - 1]) / (alpha[nn] - alpha[nn - 1])
+                             * std::exp(alpha[nn]);
+          } else {
+            RangeFieldType update = 1.;
+            RangeFieldType result = 0.;
+            RangeFieldType base = alpha[nn - 1] - alpha[nn];
+            size_t ll = 0;
+            auto pow_frac = 1. / 6.;
+            while (ll < 3 || (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(update, 0.))) {
+              update = pow_frac * (2 * v_points_[nn] + (ll + 1) * v_points_[nn - 1]);
+              result += update;
+              ++ll;
+              pow_frac *= base / (ll + 3);
+            } // ll
+            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+            ret[nn] += result * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha[nn]);
+          }
+        } else { //  if (dimRange % 2 || nn != dimRange/2)
+          const auto& alpha_pos = n_ij[0] > 0. ? alpha_i : alpha_j;
+          const auto& alpha_neg = n_ij[0] > 0. ? alpha_j : alpha_i;
+          if (std::abs(alpha_neg[nn] - alpha_neg[nn - 1]) > taylor_tol_) {
+            ret[nn] += -2. * std::pow(v_points_[nn], 2)
+                       * (4. / std::pow(alpha_neg[nn - 1] - alpha_neg[nn], 3)
+                              * (std::exp((alpha_neg[nn] + alpha_neg[nn - 1]) / 2.) - std::exp(alpha_neg[nn - 1]))
+                          + 1. / std::pow(alpha_neg[nn - 1] - alpha_neg[nn], 2)
+                                * (std::exp((alpha_neg[nn] + alpha_neg[nn - 1]) / 2.) + std::exp(alpha_neg[nn - 1])));
+
+          } else {
+            RangeFieldType update = 1.;
+            RangeFieldType result = 0.;
+            RangeFieldType base = alpha_neg[nn] - alpha_neg[nn - 1];
+            size_t ll = 2;
+            auto pow_frac = 1. / 24.;
+            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
+              update = pow_frac * (ll - 1.);
+              result += update;
+              ++ll;
+              pow_frac *= base / (2. * (ll + 1));
+            } // ll
+            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+            ret[nn] += result * -2. * std::pow(v_points_[nn], 2) * std::exp(alpha_neg[nn - 1]);
+          }
+          if (std::abs(alpha_pos[nn] - alpha_pos[nn - 1]) > taylor_tol_) {
+            ret[nn] += 2. * std::pow(v_points_[nn], 2)
+                       * (4. / std::pow(alpha_pos[nn - 1] - alpha_pos[nn], 3)
+                              * (std::exp((alpha_pos[nn] + alpha_pos[nn - 1]) / 2.) - std::exp(alpha_pos[nn]))
+                          + 1. / std::pow(alpha_pos[nn - 1] - alpha_pos[nn], 2)
+                                * (std::exp((alpha_pos[nn] + alpha_pos[nn - 1]) / 2.) - 3. * std::exp(alpha_pos[nn]))
+                          - 1. / (alpha_pos[nn - 1] - alpha_pos[nn]) * std::exp(alpha_pos[nn]));
+          } else {
+            RangeFieldType update = 1.;
+            RangeFieldType result = 0.;
+            RangeFieldType base = alpha_pos[nn - 1] - alpha_pos[nn];
+            auto pow_frac = 1. / 24.;
+            size_t ll = 2;
+            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
+              update = pow_frac * (ll + 3);
+              result += update;
+              ++ll;
+              pow_frac *= base / (2. * (ll + 1));
+            } // ll
+            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+            ret[nn] += result * 2. * std::pow(v_points_[nn], 2) * std::exp(alpha_pos[nn]);
+          } // else (alpha_n - alpha_{n-1} != 0)
+        } // else (dimRange % 2 || nn != dimRange/2)
+      } // if (nn > 0)
+      if (nn < dimRange - 1) {
+        if (dimRange % 2 || nn != dimRange / 2 - 1) {
+          const auto& alpha = (n_ij[0] * (v_points_[nn] + v_points_[nn + 1]) / 2. > 0.) ? alpha_i : alpha_j;
+          if (XT::Common::FloatCmp::ne(alpha[nn + 1], alpha[nn], 0., taylor_tol_)) {
+            ret[nn] += -2. * std::pow(v_points_[nn + 1] - v_points_[nn], 2) / std::pow(alpha[nn + 1] - alpha[nn], 3)
+                           * (std::exp(alpha[nn + 1]) - std::exp(alpha[nn]))
+                       + (v_points_[nn + 1] - v_points_[nn]) / std::pow(alpha[nn + 1] - alpha[nn], 2)
+                             * (v_points_[nn + 1] * (std::exp(alpha[nn + 1]) + std::exp(alpha[nn]))
+                                - 2 * v_points_[nn] * std::exp(alpha[nn]))
+                       - v_points_[nn] * (v_points_[nn + 1] - v_points_[nn]) / (alpha[nn + 1] - alpha[nn])
+                             * std::exp(alpha[nn]);
+          } else {
+            RangeFieldType update = 1.;
+            RangeFieldType result = 0.;
+            RangeFieldType base = alpha[nn + 1] - alpha[nn];
+            size_t ll = 0;
+            auto pow_frac = 1. / 6.;
+            while (ll < 3
+                   || (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(result, result + update, 1e-16, 0.))) {
+              update = pow_frac * (2 * v_points_[nn] + (ll + 1) * v_points_[nn + 1]);
+              result += update;
+              ++ll;
+              pow_frac *= base / (ll + 3);
+            } // ll
+            ret[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha[nn]);
+          }
+        } else { // if (dimRange % 2 || nn != dimRange / 2 - 1)
+          const auto& alpha_pos = n_ij[0] > 0. ? alpha_i : alpha_j;
+          const auto& alpha_neg = n_ij[0] > 0. ? alpha_j : alpha_i;
+          if (std::abs(alpha_neg[nn + 1] - alpha_neg[nn]) > taylor_tol_) {
+            ret[nn] += -2. * std::pow(v_points_[nn + 1], 2)
+                       * (-4. / std::pow(alpha_neg[nn + 1] - alpha_neg[nn], 3)
+                              * (std::exp(alpha_neg[nn]) - std::exp((alpha_neg[nn + 1] + alpha_neg[nn]) / 2.))
+                          - 1. / std::pow(alpha_neg[nn + 1] - alpha_neg[nn], 2)
+                                * (3 * std::exp(alpha_neg[nn]) - std::exp((alpha_neg[nn + 1] + alpha_neg[nn]) / 2.))
+                          - 1. / (alpha_neg[nn + 1] - alpha_neg[nn]) * std::exp(alpha_neg[nn]));
+          } else {
+            RangeFieldType update = 1.;
+            RangeFieldType result = 0.;
+            RangeFieldType base = alpha_neg[nn + 1] - alpha_neg[nn];
+            auto pow_frac = 1. / 24.;
+            size_t ll = 2;
+            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
+              update = pow_frac * (ll + 3);
+              result += update;
+              ++ll;
+              pow_frac *= base / (2. * (ll + 1));
+            } // ll
+            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+            ret[nn] += result * -2. * std::pow(v_points_[nn + 1], 2) * std::exp(alpha_neg[nn]);
+          }
+          if (std::abs(alpha_pos[nn + 1] - alpha_pos[nn]) > taylor_tol_) {
+            ret[nn] += 2. * std::pow(v_points_[nn + 1], 2)
+                       * (4. / std::pow(alpha_pos[nn + 1] - alpha_pos[nn], 3)
+                              * (std::exp((alpha_pos[nn + 1] + alpha_pos[nn]) / 2.) - std::exp(alpha_pos[nn + 1]))
+                          + 1. / std::pow(alpha_pos[nn + 1] - alpha_pos[nn], 2)
+                                * (std::exp((alpha_pos[nn + 1] + alpha_pos[nn]) / 2.) + std::exp(alpha_pos[nn + 1])));
+          } else {
+            RangeFieldType update = 1.;
+            RangeFieldType result = 0.;
+            RangeFieldType base = alpha_pos[nn] - alpha_pos[nn + 1];
+            auto pow_frac = 1. / 24.;
+            size_t ll = 2;
+            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
+              update = pow_frac * (ll - 1.);
+              result += update;
+              ++ll;
+              pow_frac *= base / (2. * (ll + 1));
+            } // ll
+            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+            ret[nn] += result * 2. * std::pow(v_points_[nn + 1], 2) * std::exp(alpha_pos[nn + 1]);
+          } // else (alpha_n - alpha_{n-1} != 0)
+        } // else (dimRange % 2 || nn != dimRange / 2 - 1)
+      } // if (nn < dimRange - 1)
+    } // nn
+    ret *= n_ij[0];
+    return ret;
+  } // DomainType evaluate_kinetic_flux(...)
 
-            const auto& alpha_tilde = alpha_k;
-            auto& u_alpha_tilde = tmp_vec;
-            u_alpha_tilde = g_k;
-            u_alpha_tilde += v;
-            auto density_tilde = basis_functions_.density(u_alpha_tilde);
-            if (!(density_tilde > 0.) || std::isinf(density_tilde))
-              break;
-            alpha_prime = alpha_iso;
-            alpha_prime *= -std::log(density_tilde);
-            alpha_prime += alpha_tilde;
-            auto& u_eps_diff = tmp_vec;
-            calculate_vector_integral(alpha_prime, M_, M_, u_eps_diff);
-            u_eps_diff *= -(1 - epsilon_gamma_);
-            u_eps_diff += v;
 
-            first_error_cond = g_k.two_norm();
-            second_error_cond = std::exp(d_k.one_norm() + std::abs(std::log(density_tilde)));
-            if (first_error_cond < tau_prime && 1 - epsilon_gamma_ < second_error_cond
-                && realizability_helper_.is_realizable(u_eps_diff, kk == static_cast<size_t>(0.8 * k_0_))) {
-              ret->first = alpha_prime + alpha_iso * std::log(density);
-              ret->second = r;
-              cache_.insert(v, alpha_prime);
-              alpha_storage_[x_local] = ret->first;
-              return ret;
-            } else {
-              RangeFieldType zeta_k = 1;
-              // backtracking line search
-              auto& alpha_new = tmp_vec;
-              while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm()) {
-                // calculate alpha_new = alpha_k + zeta_k d_k
-                alpha_new = d_k;
-                alpha_new *= zeta_k;
-                alpha_new += alpha_k;
-                // calculate f(alpha_new)
-                RangeFieldType f_new = calculate_scalar_integral(alpha_new, M_);
-                f_new -= alpha_new * v_k;
-                if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
-                  alpha_k = alpha_new;
-                  f_k = f_new;
-                  pure_newton = 0;
-                  break;
-                }
-                zeta_k = chi_ * zeta_k;
-              } // backtracking linesearch while
-              // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
-              if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm())
-                ++pure_newton;
-            } // else (stopping conditions)
-          } // k loop (Newton iterations)
-        } // r loop (Regularization parameter)
-        mutex_.unlock();
-        const std::string err_msg =
-            "Failed to converge for " + XT::Common::to_string(u) + " with density " + XT::Common::to_string(density)
-            + " and multiplier " + XT::Common::to_string(alpha_k) + " at position "
-            + XT::Common::to_string(entity().geometry().center()) + " due to too many iterations! Last u_eps_diff = "
-            + XT::Common::to_string(u_eps_diff) + ", first_error_cond = " + XT::Common::to_string(first_error_cond)
-            + ", second_error_cond = " + XT::Common::to_string(second_error_cond)
-            + ", tau_prime = " + XT::Common::to_string(tau_prime);
-        DUNE_THROW(MathError, err_msg);
-      } // else ( value has not been calculated before )
-    }
+  // returns (alpha, (actual_u, r)), where r is the regularization parameter and actual_u the regularized u
+  std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const
+  {
+    auto ret = std::make_unique<AlphaReturnType>();
+    // rescale u such that the density <psi> is 1
+    RangeFieldType density = basis_functions_.density(u);
+    if (!(density > 0.) || std::isinf(density))
+      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    VectorType u_prime = u / density;
+    VectorType alpha_initial = alpha_in - alpha_iso_prime * std::log(density);
+    RangeFieldType tau_prime =
+        std::min(tau_ / ((1 + std::sqrt(dimRange) * u_prime.two_norm()) * density + std::sqrt(dimRange) * tau_), tau_);
+    // The hessian H is always symmetric and tridiagonal, so we only need to store the diagonal and subdiagonal
+    // elements
+    VectorType H_diag;
+    FieldVector<RangeFieldType, dimRange - 1> H_subdiag;
 
-    virtual size_t order(const XT::Common::Parameter& /*param*/) const override
-    {
-      return 1;
-    }
+    // calculate moment vector for isotropic distribution
+    VectorType u_iso = basis_functions_.u_iso();
+    VectorType v;
+    VectorType alpha_k = alpha_initial;
+    const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
+    const auto r_max = r_sequence.back();
+    for (const auto& r : r_sequence_) {
+      // regularize u
+      v = u_prime;
+      if (r > 0) {
+        alpha_k = get_isotropic_alpha(u);
+        VectorType r_times_u_iso(u_iso);
+        r_times_u_iso *= r;
+        v *= 1 - r;
+        v += r_times_u_iso;
+      }
 
-    virtual void evaluate(const DomainType& x_local,
-                          const StateRangeType& u,
-                          RangeType& ret,
-                          const XT::Common::Parameter& param) const override
-    {
-      ColRangeType col_ret;
-      for (size_t dd = 0; dd < dimDomain; ++dd) {
-        evaluate_col(dd, x_local, u, col_ret, param);
-        for (size_t ii = 0; ii < dimRange; ++ii)
-          helper<dimDomain>::get_ref(ret, ii, dd) = col_ret[ii];
-      } // dd
-    } // void evaluate(...)
-
-    virtual void evaluate_col(const size_t col,
-                              const DomainType& x_local,
-                              const StateRangeType& u,
-                              ColRangeType& ret,
-                              const XT::Common::Parameter& param) const override
-    {
-      std::fill(ret.begin(), ret.end(), 0.);
-      const auto alpha = get_alpha(x_local, u, param, true)->first;
-      auto& work_vecs = working_storage();
-      calculate_scalar_products(alpha, M_, work_vecs);
-      apply_exponential(work_vecs);
-      // calculate ret[ii] = < omega[ii] m G_\alpha(u) >
-      for (size_t ll = 0; ll < quad_weights_.size(); ++ll) {
-        const auto factor = work_vecs[ll] * quad_weights_[ll] * quad_points_[ll][col];
-        for (size_t ii = 0; ii < dimRange; ++ii)
-          ret[ii] += M_.get_entry(ll, ii) * factor;
-      } // ll
-    } // void evaluate_col(...)
+      // calculate f_0
+      RangeFieldType f_k = calculate_f(alpha_k, v);
 
-    virtual void partial_u(const DomainType& x_local,
-                           const StateRangeType& /*u*/,
-                           PartialURangeType& ret,
-                           const XT::Common::Parameter& /*param*/) const override
-    {
-      const auto alpha = get_stored_alpha(x_local);
-      thread_local auto H = XT::Common::make_unique<MatrixType>();
-      calculate_hessian(alpha, M_, *H);
-      helper<dimDomain>::partial_u(M_, *H, ret, this);
-    }
+      int pure_newton = 0;
+      for (size_t kk = 0; kk < k_max_; ++kk) {
+        // exit inner for loop to increase r if too many iterations are used
+        if (kk > k_0_ && r < r_max)
+          break;
+        // calculate gradient g
+        VectorType g_k = calculate_gradient(alpha_k, v);
+        // calculate Hessian H
+        calculate_hessian(alpha_k, H_diag, H_subdiag);
+        // calculate descent direction d_k;
+        VectorType d_k(0), minus_g_k(g_k);
+        minus_g_k *= -1;
+        try {
+          d_k = minus_g_k;
+          XT::LA::solve_sym_tridiag_posdef(H_diag, H_subdiag, d_k);
+        } catch (const Dune::MathError&) {
+          if (r < r_max)
+            break;
+          else
+            DUNE_THROW(Dune::MathError, "Failure to converge!");
+        }
 
-    virtual void partial_u_col(const size_t col,
-                               const DomainType& x_local,
-                               const StateRangeType& /*u*/,
-                               ColPartialURangeType& ret,
-                               const XT::Common::Parameter& /*param*/) const override
-    {
-      const auto alpha = get_stored_alpha(x_local);
-      thread_local auto H = XT::Common::make_unique<MatrixType>();
-      calculate_hessian(alpha, M_, *H);
-      partial_u_col_helper(col, M_, *H, ret);
-    }
+        const auto& alpha_tilde = alpha_k;
+        const auto u_alpha_tilde = g_k + v;
+        auto density_tilde = basis_functions_.density(u_alpha_tilde);
+        if (!(density_tilde > 0.) || std::isinf(density_tilde))
+          break;
+        const auto alpha_prime = alpha_tilde - alpha_iso_prime * std::log(density_tilde);
+        const auto u_alpha_prime = calculate_u(alpha_prime);
+        auto u_eps_diff = v - u_alpha_prime * (1 - epsilon_gamma_);
+        // checking realizability is cheap so we do not need the second stopping criterion
+        if (g_k.two_norm() < tau_prime && is_realizable(u_eps_diff)) {
+          ret->first = alpha_prime + alpha_iso_prime * std::log(density);
+          ret->second = std::make_pair(v * density, r);
+          return ret;
+        } else {
+          RangeFieldType zeta_k = 1;
+          // backtracking line search
+          while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm()) {
+            // while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.) {
+            // calculate alpha_new = alpha_k + zeta_k d_k
+            auto alpha_new = d_k;
+            alpha_new *= zeta_k;
+            alpha_new += alpha_k;
+            // calculate f(alpha_new)
+            RangeFieldType f_new = calculate_f(alpha_new, v);
+            if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
+              alpha_k = alpha_new;
+              f_k = f_new;
+              pure_newton = 0.;
+              break;
+            }
+            zeta_k = chi_ * zeta_k;
+          } // backtracking linesearch while
+          // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
+          if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm())
+            ++pure_newton;
+        } // else (stopping conditions)
+      } // k loop (Newton iterations)
+    } // r loop (Regularization parameter)
+    DUNE_THROW(MathError, "Failed to converge");
 
-    static std::string static_id()
-    {
-      return "gdt.entropybasedlocalflux";
-    }
+    return ret;
+  } // ... get_alpha(...)
 
-  private:
-    template <size_t domainDim = dimDomain, class anything = void>
-    struct helper
-    {
-      static void partial_u(const BasisValuesMatrixType& M,
-                            MatrixType& H,
-                            PartialURangeType& ret,
-                            const Localfunction* entropy_flux)
-      {
-        for (size_t dd = 0; dd < domainDim; ++dd)
-          entropy_flux->partial_u_col_helper(dd, M, H, ret[dd], dd > 0);
-      } // void partial_u(...)
-
-      static RangeFieldType& get_ref(RangeType& ret, const size_t rr, const size_t cc)
-      {
-        return ret[rr][cc];
-      }
-    }; // class helper<...>
+  const MomentBasis& basis_functions() const
+  {
+    return basis_functions_;
+  }
 
-    template <class anything>
-    struct helper<1, anything>
-    {
-      static void partial_u(const BasisValuesMatrixType& M,
-                            MatrixType& H,
-                            PartialURangeType& ret,
-                            const Localfunction* entropy_flux)
-      {
-        entropy_flux->partial_u_col_helper(0, M, H, ret, false);
-      } // void partial_u(...)
-
-      static RangeFieldType& get_ref(RangeType& ret, const size_t rr, const size_t DXTC_DEBUG_ONLY(cc))
-      {
-        assert(cc == 0);
-        return ret[rr];
+private:
+  RangeFieldType calculate_f(const VectorType& alpha_k, const VectorType& v) const
+  {
+    RangeFieldType ret(0);
+    for (size_t ii = 0; ii < dimRange - 1; ++ii) {
+      if (std::abs(alpha_k[ii + 1] - alpha_k[ii]) > taylor_tol_) {
+        ret += (v_points_[ii + 1] - v_points_[ii]) / (alpha_k[ii + 1] - alpha_k[ii])
+               * (std::exp(alpha_k[ii + 1]) - std::exp(alpha_k[ii]));
+      } else {
+        RangeFieldType update = 1.;
+        RangeFieldType result = 0.;
+        size_t ll = 1;
+        RangeFieldType base = alpha_k[ii + 1] - alpha_k[ii];
+        auto pow_frac = 1.;
+        while (ll <= max_taylor_order_ && XT::Common::FloatCmp::ne(update, 0.)) {
+          update = pow_frac;
+          result += update;
+          ++ll;
+          pow_frac *= base / ll;
+        }
+        assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+        ret += result * (v_points_[ii + 1] - v_points_[ii]) * std::exp(alpha_k[ii]);
       }
-    }; // class helper<1, ...>
-
-    void partial_u_col_helper(const size_t col,
-                              const BasisValuesMatrixType& M,
-                              MatrixType& H,
-                              ColPartialURangeType& ret,
-                              bool L_calculated = false) const
-    {
-      assert(col < dimDomain);
-      calculate_J(M, ret, col);
-      calculate_A_Binv(ret, H, L_calculated);
-    } // void partial_u_col(...)
-
-    // calculates A = A B^{-1}. B is assumed to be symmetric positive definite.
-    static void calculate_A_Binv(ColPartialURangeType& A, MatrixType& B, bool L_calculated = false)
-    {
-      // if B = LL^T, then we have to calculate ret = A (L^T)^{-1} L^{-1} = C L^{-1}
-      // calculate B = LL^T first
-      if (!L_calculated)
-        XT::LA::cholesky(B);
-      VectorType tmp_vec;
-      for (size_t ii = 0; ii < dimRange; ++ii) {
-        // calculate C = A (L^T)^{-1} and store in B
-        XT::LA::solve_lower_triangular(B, tmp_vec, A[ii]);
-        // calculate ret = C L^{-1}
-        XT::LA::solve_lower_triangular_transposed(B, A[ii], tmp_vec);
-      } // ii
-    } // void calculate_A_Binv(...)
+    } // ii
+    ret -= alpha_k * v;
+    return ret;
+  } // .. calculate_f(...)
 
-    void calculate_hessian(const VectorType& alpha, const BasisValuesMatrixType& M, MatrixType& H, const bool use_work_vec_data = false) const
-    {
-      std::fill(H.begin(), H.end(), 0.);
-      auto& work_vec = working_storage();
-      if (!use_work_vec_data) {
-        calculate_scalar_products(alpha, M, work_vec);
-        apply_exponential(work_vec);
-      }
-      const size_t num_quad_points = quad_weights_.size();
-      // matrix is symmetric, we only use lower triangular part
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        auto factor_ll = work_vec[ll] * quad_weights_[ll];
-        const auto* basis_ll = M.get_ptr(ll);
-        for (size_t ii = 0; ii < dimRange; ++ii) {
-          auto* H_row = &(H[ii][0]);
-          const auto factor_ll_ii = basis_ll[ii] * factor_ll;
-          if (!XT::Common::is_zero(factor_ll_ii)) {
-            for (size_t kk = 0; kk <= ii; ++kk) {
-              H_row[kk] += basis_ll[kk] * factor_ll_ii;
-            } // kk
+  VectorType calculate_u(const VectorType& alpha_k) const
+  {
+    VectorType u(0);
+    for (size_t nn = 0; nn < dimRange; ++nn) {
+      if (nn > 0) {
+        if (std::abs(alpha_k[nn] - alpha_k[nn - 1]) > taylor_tol_) {
+          u[nn] += -(v_points_[nn] - v_points_[nn - 1]) / std::pow(alpha_k[nn] - alpha_k[nn - 1], 2)
+                       * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1]))
+                   + (v_points_[nn] - v_points_[nn - 1]) / (alpha_k[nn] - alpha_k[nn - 1]) * std::exp(alpha_k[nn]);
+        } else {
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha_k[nn - 1] - alpha_k[nn];
+          size_t ll = 0;
+          RangeFieldType update = 1;
+          RangeFieldType pow_frac = 0.5;
+          while (ll <= max_taylor_order_ - 2 && XT::Common::FloatCmp::ne(update, 0.)) {
+            update = pow_frac;
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 2);
           }
-        } // ii
-      } // ll
-    } // void calculate_hessian(...)
-
-    // J = df/dalpha is the derivative of the flux with respect to alpha.
-    // As F = (f_1, f_2, f_3) is matrix-valued
-    // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
-    // vector-valued),
-    // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
-    // this function returns the dd-th matrix df_dd/dalpha of J
-    // assumes work_vecs already contains the needed exp(alpha * m) values
-    void calculate_J(const BasisValuesMatrixType& M,
-                     Dune::FieldMatrix<RangeFieldType, dimRange, StateType::dimRange>& J_dd,
-                     const size_t dd) const
-    {
-      assert(dd < dimRangeCols);
-      const auto& work_vecs = working_storage();
-      std::fill(J_dd.begin(), J_dd.end(), 0);
-      const size_t num_quad_points = quad_points_.size();
-      for (size_t ll = 0; ll < num_quad_points; ++ll) {
-        const auto factor_ll = work_vecs[ll] * quad_points_[ll][dd] * quad_weights_[ll];
-        const auto* basis_ll = M.get_ptr(ll);
-        for (size_t ii = 0; ii < dimRange; ++ii) {
-          const auto factor_ll_ii = factor_ll * basis_ll[ii];
-          if (!XT::Common::is_zero(factor_ll_ii)) {
-            for (size_t kk = 0; kk <= ii; ++kk)
-              J_dd[ii][kk] += basis_ll[kk] * factor_ll_ii;
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          u[nn] += result * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn]);
+        }
+      } // if (nn > 0)
+      if (nn < dimRange - 1) {
+        if (std::abs(alpha_k[nn + 1] - alpha_k[nn]) > taylor_tol_) {
+          u[nn] += (v_points_[nn + 1] - v_points_[nn]) / std::pow(alpha_k[nn + 1] - alpha_k[nn], 2)
+                       * (std::exp(alpha_k[nn + 1]) - std::exp(alpha_k[nn]))
+                   - (v_points_[nn + 1] - v_points_[nn]) / (alpha_k[nn + 1] - alpha_k[nn]) * std::exp(alpha_k[nn]);
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          size_t ll = 0;
+          RangeFieldType base = alpha_k[nn + 1] - alpha_k[nn];
+          auto pow_frac = 0.5;
+          while (ll <= max_taylor_order_ - 2 && XT::Common::FloatCmp::ne(update, 0.)) {
+            update = pow_frac;
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 2);
           }
-        } // ii
-      } // ll
-      // symmetric update for upper triangular part of J
-      for (size_t mm = 0; mm < dimRange; ++mm)
-        for (size_t nn = mm + 1; nn < dimRange; ++nn)
-          J_dd[mm][nn] = J_dd[nn][mm];
-    } // void calculate_J(...)
-
-    const BasisfunctionType& basis_functions_;
-    const std::vector<DomainType>& quad_points_;
-    const std::vector<RangeFieldType>& quad_weights_;
-    const BasisValuesMatrixType& M_;
-    const RangeFieldType tau_;
-    const RangeFieldType epsilon_gamma_;
-    const RangeFieldType chi_;
-    const RangeFieldType xi_;
-    const std::vector<RangeFieldType>& r_sequence_;
-    const size_t k_0_;
-    const size_t k_max_;
-    const RangeFieldType epsilon_;
-    const std::string name_;
-    // constructor)
-    LocalCacheType& cache_;
-    AlphaStorageType& alpha_storage_;
-    std::mutex& mutex_;
-    RealizabilityHelper<BasisfunctionType, true, true> realizability_helper_;
-  }; // class Localfunction
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          u[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha_k[nn]);
+        }
+      } // if (nn < dimRange-1)
+    } // nn
+    return u;
+  } // VectorType calculate_u(...)
 
-  static std::string static_id()
+  VectorType calculate_gradient(const VectorType& alpha_k, const VectorType& v) const
   {
-    return "gdt.entropybasedflux";
+    return calculate_u(alpha_k) - v;
   }
 
-  std::unique_ptr<LocalfunctionType> local_function(const EntityType& entity) const
+  void calculate_hessian(const VectorType& alpha_k,
+                         VectorType& diag,
+                         FieldVector<RangeFieldType, dimRange - 1>& subdiag) const
   {
-    return derived_local_function(entity);
-  }
+    std::fill(diag.begin(), diag.end(), 0.);
+    std::fill(subdiag.begin(), subdiag.end(), 0.);
+    for (size_t nn = 0; nn < dimRange; ++nn) {
+      if (nn > 0) {
+        if (std::abs(alpha_k[nn] - alpha_k[nn - 1]) > taylor_tol_) {
+          subdiag[nn - 1] =
+              (v_points_[nn] - v_points_[nn - 1])
+              * ((std::exp(alpha_k[nn]) + std::exp(alpha_k[nn - 1])) / std::pow(alpha_k[nn] - alpha_k[nn - 1], 2)
+                 - 2. * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1]))
+                       / std::pow(alpha_k[nn] - alpha_k[nn - 1], 3));
+          diag[nn] = (v_points_[nn] - v_points_[nn - 1])
+                     * ((-2. / std::pow(alpha_k[nn] - alpha_k[nn - 1], 2) + 1. / (alpha_k[nn] - alpha_k[nn - 1]))
+                            * std::exp(alpha_k[nn])
+                        + 2. / std::pow(alpha_k[nn] - alpha_k[nn - 1], 3)
+                              * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1])));
 
-  std::unique_ptr<Localfunction> derived_local_function(const EntityType& entity) const
-  {
-    const auto& index = index_set_.index(entity);
-    return std::make_unique<Localfunction>(entity,
-                                           basis_functions_,
-                                           quad_points_,
-                                           quad_weights_,
-                                           M_,
-                                           tau_,
-                                           epsilon_gamma_,
-                                           chi_,
-                                           xi_,
-                                           r_sequence_,
-                                           k_0_,
-                                           k_max_,
-                                           epsilon_,
-                                           cache_[index],
-                                           alpha_storage_[index],
-                                           mutexes_[index]
-#    if HAVE_CLP
-                                           ,
-                                           lp_);
-#    else
-    );
-#    endif
-  }
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha_k[nn - 1] - alpha_k[nn];
+          RangeFieldType factor = (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn]);
+          size_t ll = 2;
+          auto pow_frac = 1. / 6.;
+          while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
+            update = pow_frac * (ll - 1.);
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 1);
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          subdiag[nn - 1] += result * factor;
 
-  // calculate \sum_{i=1}^d < v_i m \psi > n_i, where n is the unit outer normal,
-  // m is the basis function vector, phi_u is the ansatz corresponding to u
-  // and x, v, t are the space, velocity and time variable, respectively
-  // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
-  StateRangeType evaluate_kinetic_flux(const EntityType& entity,
-                                       const DomainType& x_local_entity,
-                                       const StateRangeType& /*u_i*/,
-                                       const EntityType& neighbor,
-                                       const DomainType& x_local_neighbor,
-                                       const StateRangeType& u_j,
-                                       const DomainType& n_ij,
-                                       const size_t dd,
-                                       const XT::Common::Parameter& /*param*/,
-                                       const XT::Common::Parameter& param_neighbor) const
-  {
-    assert(XT::Common::FloatCmp::ne(n_ij[dd], 0.));
-    const bool boundary = static_cast<bool>(param_neighbor.get("boundary")[0]);
-    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
-    const auto local_function_entity = derived_local_function(entity);
-    const auto local_function_neighbor = derived_local_function(neighbor);
-    const auto alpha_i = local_function_entity->get_stored_alpha(x_local_entity);
-    StateRangeType alpha_j;
-    if (boundary)
-      alpha_j = local_function_neighbor->get_alpha(x_local_neighbor, u_j, param_neighbor, true)->first;
-    else
-      alpha_j = local_function_neighbor->get_stored_alpha(x_local_neighbor);
-    thread_local FieldVector<std::vector<RangeFieldType>, 2> work_vecs;
-    work_vecs[0].resize(quad_points_.size());
-    work_vecs[1].resize(quad_points_.size());
-    local_function_entity->calculate_scalar_products(alpha_i, M_, work_vecs[0]);
-    local_function_entity->calculate_scalar_products(alpha_j, M_, work_vecs[1]);
-    StateRangeType ret(0);
-    for (size_t ll = 0; ll < quad_points_.size(); ++ll) {
-      const auto position = quad_points_[ll][dd];
-      RangeFieldType factor = position * n_ij[dd] > 0. ? std::exp(work_vecs[0][ll]) : std::exp(work_vecs[1][ll]);
-      factor *= quad_weights_[ll] * position;
-      const auto* basis_ll = M_.get_ptr(ll);
-      for (size_t ii = 0; ii < dimRange; ++ii)
-        ret[ii] += basis_ll[ii] * factor;
-    } // ll
-    ret *= n_ij[dd];
-    return ret;
-  } // StateRangeType evaluate_kinetic_flux(...)
+          result = 0.;
+          update = 1;
+          ll = 3;
+          pow_frac = 2. / 6.;
+          while (ll <= max_taylor_order_ && XT::Common::FloatCmp::ne(update, 0.)) {
+            update = pow_frac;
+            result += update;
+            ++ll;
+            pow_frac *= base / ll;
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          diag[nn] += result * factor;
+        }
+      } // if (nn > 0)
+      if (nn < dimRange - 1) {
+        if (std::abs(alpha_k[nn + 1] - alpha_k[nn]) > taylor_tol_) {
+          diag[nn] += (v_points_[nn + 1] - v_points_[nn])
+                      * ((-2. / std::pow(alpha_k[nn + 1] - alpha_k[nn], 2) - 1. / (alpha_k[nn + 1] - alpha_k[nn]))
+                             * std::exp(alpha_k[nn])
+                         + 2. / std::pow(alpha_k[nn + 1] - alpha_k[nn], 3)
+                               * (std::exp(alpha_k[nn + 1]) - std::exp(alpha_k[nn])));
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha_k[nn + 1] - alpha_k[nn];
+          size_t ll = 3;
+          auto pow_frac = 2. / 6.;
+          while (ll <= max_taylor_order_ && XT::Common::FloatCmp::ne(update, 0.)) {
+            update = pow_frac;
+            result += update;
+            ++ll;
+            pow_frac *= base / ll;
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          diag[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha_k[nn]);
+        }
+      } // if (nn < dimRange - 1)
+    } // nn
+  } // void calculate_hessian(...)
+
+  void
+  calculate_J(const VectorType& alpha_k, VectorType& diag, FieldVector<RangeFieldType, dimRange - 1>& subdiag) const
+  {
+    std::fill(diag.begin(), diag.end(), 0.);
+    std::fill(subdiag.begin(), subdiag.end(), 0.);
+    for (size_t nn = 0; nn < dimRange; ++nn) {
+      if (nn > 0) {
+        if (std::abs(alpha_k[nn] - alpha_k[nn - 1]) > taylor_tol_) {
+          subdiag[nn - 1] =
+              (v_points_[nn] - v_points_[nn - 1])
+                  * ((v_points_[nn] * std::exp(alpha_k[nn]) + v_points_[nn - 1] * std::exp(alpha_k[nn - 1]))
+                         / std::pow(alpha_k[nn - 1] - alpha_k[nn], 2)
+                     + 2.
+                           * ((2 * v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn])
+                              - (2 * v_points_[nn - 1] - v_points_[nn]) * std::exp(alpha_k[nn - 1]))
+                           / std::pow(alpha_k[nn - 1] - alpha_k[nn], 3))
+              + 6. * std::pow(v_points_[nn] - v_points_[nn - 1], 2)
+                    * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1])) / std::pow(alpha_k[nn - 1] - alpha_k[nn], 4);
+          diag[nn] = 6 * std::pow(v_points_[nn - 1] - v_points_[nn], 2)
+                         * (std::exp(alpha_k[nn - 1]) - std::exp(alpha_k[nn]))
+                         / std::pow(alpha_k[nn - 1] - alpha_k[nn], 4)
+                     + 2. * (v_points_[nn] - v_points_[nn - 1])
+                           * (v_points_[nn - 1] * std::exp(alpha_k[nn - 1])
+                              - (3 * v_points_[nn] - 2 * v_points_[nn - 1]) * std::exp(alpha_k[nn]))
+                           / std::pow(alpha_k[nn - 1] - alpha_k[nn], 3)
+                     - v_points_[nn] * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn])
+                           / (alpha_k[nn - 1] - alpha_k[nn])
+                     - (std::pow(v_points_[nn - 1], 2) - 4 * v_points_[nn] * v_points_[nn - 1]
+                        + 3. * std::pow(v_points_[nn], 2))
+                           * std::exp(alpha_k[nn]) / std::pow(alpha_k[nn - 1] - alpha_k[nn], 2);
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha_k[nn - 1] - alpha_k[nn];
+          RangeFieldType factor = (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn]);
+          size_t ll = 0;
+          auto pow_frac = 1. / 24.;
+          while (ll < 2 || (ll <= max_taylor_order_ - 4 && XT::Common::FloatCmp::ne(update, 0.))) {
+            update = pow_frac * ((ll * ll + 3 * ll + 2) * v_points_[nn - 1] + (2 * ll + 2) * v_points_[nn]);
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 4);
+          } // ll
+          subdiag[nn - 1] += result * factor;
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+
+          result = 0.;
+          update = 1;
+          ll = 0;
+          pow_frac = 1. / 24.;
+          while (ll < 4 || (ll <= max_taylor_order_ - 4 && XT::Common::FloatCmp::ne(update, 0.))) {
+            update = pow_frac * (6 * v_points_[nn] + (2 * ll + 2) * v_points_[nn - 1]);
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 4);
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          diag[nn] += result * factor;
+        }
+      } // if (nn > 0)
+      if (nn < dimRange - 1) {
+        if (std::abs(alpha_k[nn + 1] - alpha_k[nn]) > taylor_tol_) {
+          diag[nn] += 6 * std::pow(v_points_[nn] - v_points_[nn + 1], 2)
+                          * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn + 1]))
+                          / std::pow(alpha_k[nn] - alpha_k[nn + 1], 4)
+                      + 2. * (v_points_[nn] - v_points_[nn + 1])
+                            * (v_points_[nn + 1] * std::exp(alpha_k[nn + 1])
+                               - (3 * v_points_[nn] - 2 * v_points_[nn + 1]) * std::exp(alpha_k[nn]))
+                            / std::pow(alpha_k[nn] - alpha_k[nn + 1], 3)
+                      - v_points_[nn] * (v_points_[nn] - v_points_[nn + 1]) * std::exp(alpha_k[nn])
+                            / (alpha_k[nn] - alpha_k[nn + 1])
+                      + (std::pow(v_points_[nn + 1], 2) - 4 * v_points_[nn] * v_points_[nn + 1]
+                         + 3. * std::pow(v_points_[nn], 2))
+                            * std::exp(alpha_k[nn]) / std::pow(alpha_k[nn] - alpha_k[nn + 1], 2);
+        } else {
+          RangeFieldType update = 1.;
+          RangeFieldType result = 0.;
+          RangeFieldType base = alpha_k[nn + 1] - alpha_k[nn];
+          size_t ll = 0;
+          auto pow_frac = 1. / 24.;
+          while (ll < 4 || (ll <= max_taylor_order_ - 4 && XT::Common::FloatCmp::ne(update, 0.))) {
+            update = pow_frac * (6 * v_points_[nn] + (2 * ll + 2) * v_points_[nn + 1]);
+            result += update;
+            ++ll;
+            pow_frac *= base / (ll + 4);
+          } // ll
+          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+          diag[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha_k[nn]);
+        }
+      } // if (nn < dimRange - 1)
+    } // nn
+  } // void calculate_J(...)
 
-  const BasisfunctionType& basis_functions() const
+  // calculates ret = J H^{-1}. Both J and H are symmetric tridiagonal, H is positive definite.
+  static void calculate_J_Hinv(MatrixType& ret,
+                               const VectorType& J_diag,
+                               const FieldVector<RangeFieldType, dimRange - 1>& J_subdiag,
+                               VectorType& H_diag,
+                               FieldVector<RangeFieldType, dimRange - 1>& H_subdiag)
   {
-    return basis_functions_;
-  }
+    // factorize H = LDL^T, where L is unit lower bidiagonal and D is diagonal
+    // H_diag is overwritten by the diagonal elements of D
+    // H_subdiag is overwritten by the subdiagonal elements of L
+    XT::LA::tridiagonal_ldlt(H_diag, H_subdiag);
 
-private:
-  const typename GridLayerType::IndexSet& index_set_;
-  const BasisfunctionType& basis_functions_;
-  std::vector<DomainType> quad_points_;
-  std::vector<RangeFieldType> quad_weights_;
-  BasisValuesMatrixType M_;
+    // copy J to dense matrix
+    std::fill(ret.begin(), ret.end(), 0.);
+    for (size_t ii = 0; ii < dimRange - 1; ++ii) {
+      ret[ii][ii] = J_diag[ii];
+      ret[ii + 1][ii] = J_subdiag[ii];
+      ret[ii][ii + 1] = J_subdiag[ii];
+    }
+    ret[dimRange - 1][dimRange - 1] = J_diag[dimRange - 1];
+
+    // Solve ret H = J which is equivalent to (as H and J are symmetric) to H ret^T = J;
+    XT::LA::solve_tridiagonal_ldlt_factorized(H_diag, H_subdiag, ret);
+    // transpose ret
+    for (size_t ii = 0; ii < dimRange; ++ii)
+      for (size_t jj = 0; jj < ii; ++jj)
+        std::swap(ret[jj][ii], ret[ii][jj]);
+  } // void calculate_J_Hinv(...)
+
+  const MomentBasis& basis_functions_;
+  const std::vector<RangeFieldType>& v_points_;
   const RangeFieldType tau_;
   const RangeFieldType epsilon_gamma_;
   const RangeFieldType chi_;
@@ -3249,74 +3023,47 @@ private:
   const size_t k_0_;
   const size_t k_max_;
   const RangeFieldType epsilon_;
-  const std::string name_;
-  // Use unique_ptr in the vectors to avoid the memory cost for storing twice as many matrices or vectors as needed
-  // (see
-  // constructor)
-  mutable std::vector<LocalCacheType> cache_;
-  mutable std::vector<AlphaStorageType> alpha_storage_;
-  mutable std::vector<std::mutex> mutexes_;
-#    if HAVE_CLP
-  mutable XT::Common::PerThreadValue<std::unique_ptr<ClpSimplex>> lp_;
-#    endif
+  const RangeFieldType taylor_tol_;
+  const size_t max_taylor_order_;
 };
-#  endif
-
+#endif
 
-#  if 0
+#if 1
 /**
- * Specialization of EntropyBasedLocalFlux for 1D Hatfunctions (no change of basis, analytic integrals + Taylor)
+ * Specialization of EntropyBasedFluxImplementation for 1D Hatfunctions (no change of basis, use structure)
  */
-template <class GridLayerImp, class U>
-class EntropyBasedLocalFlux<
-    HatFunctionMomentBasis<typename U::DomainFieldType, 1, typename U::RangeFieldType, U::dimRange, 1, 1>,
-    GridLayerImp,
-    U>
-  : public XT::Functions::LocalizableFluxFunctionInterface<typename GridLayerImp::template Codim<0>::Entity,
-                                                           typename U::DomainFieldType,
-                                                           GridLayerImp::dimension,
-                                                           U,
-                                                           0,
-                                                           typename U::RangeFieldType,
-                                                           U::dimRange,
-                                                           1>
+template <class D, class R, size_t dimRange>
+class EntropyBasedFluxImplementation<HatFunctionMomentBasis<D, 1, R, dimRange, 1>>
+  : public XT::Functions::FunctionInterface<dimRange, 1, dimRange, R>
 {
-  using BaseType =
-      typename XT::Functions::LocalizableFluxFunctionInterface<typename GridLayerImp::template Codim<0>::Entity,
-                                                               typename U::DomainFieldType,
-                                                               GridLayerImp::dimension,
-                                                               U,
-                                                               0,
-                                                               typename U::RangeFieldType,
-                                                               U::dimRange,
-                                                               1>;
-  using ThisType = EntropyBasedLocalFlux;
+  using BaseType = typename XT::Functions::FunctionInterface<dimRange, 1, dimRange, R>;
+  using ThisType = EntropyBasedFluxImplementation;
 
 public:
-  using BaseType::dimDomain;
-  using BaseType::dimRange;
-  using BaseType::dimRangeCols;
+  using MomentBasis = HatFunctionMomentBasis<D, 1, R, dimRange, 1>;
+  using BaseType::d;
+  using BaseType::r;
+  static const size_t basis_dimDomain = MomentBasis::dimDomain;
+  static const size_t basis_dimRange = dimRange;
+  using typename BaseType::DerivativeRangeReturnType;
   using typename BaseType::DomainFieldType;
   using typename BaseType::DomainType;
-  using typename BaseType::EntityType;
-  using typename BaseType::LocalfunctionType;
-  using typename BaseType::PartialURangeType;
   using typename BaseType::RangeFieldType;
-  using typename BaseType::RangeType;
-  using typename BaseType::StateRangeType;
-  using typename BaseType::StateType;
-  using BasisfunctionType = HatFunctionMomentBasis<DomainFieldType, 1, RangeFieldType, dimRange, 1, 1>;
-  using GridLayerType = GridLayerImp;
-  using QuadratureRuleType = Dune::QuadratureRule<DomainFieldType, 1>;
-  using MatrixType = FieldMatrix<RangeFieldType, dimRange, dimRange>;
-  using AlphaReturnType = typename std::pair<StateRangeType, RangeFieldType>;
-  using LocalCacheType = EntropyLocalCache<StateRangeType, StateRangeType>;
-  using AlphaStorageType = std::map<DomainType, StateRangeType, XT::Common::VectorFloatLess>;
-  static const size_t cache_size = 4 * dimDomain + 2;
-
-  explicit EntropyBasedLocalFlux(
-      const BasisfunctionType& basis_functions,
-      const GridLayerType& grid_layer,
+  using typename BaseType::RangeReturnType;
+  using typename BaseType::RowDerivativeRangeReturnType;
+  using BasisDomainType = typename MomentBasis::DomainType;
+  using MatrixType = XT::Common::FieldMatrix<RangeFieldType, basis_dimRange, basis_dimRange>;
+  using VectorType = XT::Common::FieldVector<RangeFieldType, basis_dimRange>;
+  using AlphaReturnType = std::pair<VectorType, std::pair<DomainType, RangeFieldType>>;
+  static const size_t num_intervals = dimRange - 1;
+  static const size_t block_size = 2;
+  using LocalVectorType = XT::Common::FieldVector<RangeFieldType, block_size>;
+  using BasisValuesMatrixType = FieldVector<std::vector<LocalVectorType>, num_intervals>;
+  using QuadraturePointsType = FieldVector<std::vector<RangeFieldType>, num_intervals>;
+  using QuadratureWeightsType = FieldVector<std::vector<RangeFieldType>, num_intervals>;
+
+  explicit EntropyBasedFluxImplementation(
+      const MomentBasis& basis_functions,
       const RangeFieldType tau = 1e-9,
       const RangeFieldType epsilon_gamma = 0.01,
       const RangeFieldType chi = 0.5,
@@ -3324,13 +3071,9 @@ public:
       const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
       const size_t k_0 = 500,
       const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const RangeFieldType taylor_tol = 0.1,
-      const size_t max_taylor_order = 200,
-      const std::string name = static_id())
-    : index_set_(grid_layer.indexSet())
-    , basis_functions_(basis_functions)
-    , v_points_(basis_functions_.triangulation())
+      const RangeFieldType epsilon = std::pow(2, -52))
+    : basis_functions_(basis_functions)
+    , grid_points_(basis_functions_.triangulation())
     , tau_(tau)
     , epsilon_gamma_(epsilon_gamma)
     , chi_(chi)
@@ -3339,851 +3082,365 @@ public:
     , k_0_(k_0)
     , k_max_(k_max)
     , epsilon_(epsilon)
-    , taylor_tol_(taylor_tol)
-    , max_taylor_order_(max_taylor_order)
-    , name_(name)
-    , cache_(index_set_.size(0), LocalCacheType(cache_size))
-    , alpha_storage_(index_set_.size(0))
-    , mutexes_(index_set_.size(0))
-  {}
-
-  class Localfunction : public LocalfunctionType
   {
-  public:
-    using LocalfunctionType::dimDomain;
-    using typename LocalfunctionType::ColPartialURangeType;
-    using typename LocalfunctionType::ColRangeType;
-
-    Localfunction(const EntityType& e,
-                  const BasisfunctionType& basis_functions,
-                  const std::vector<RangeFieldType>& v_points,
-                  const RangeFieldType tau,
-                  const RangeFieldType epsilon_gamma,
-                  const RangeFieldType chi,
-                  const RangeFieldType xi,
-                  const std::vector<RangeFieldType>& r_sequence,
-                  const size_t k_0,
-                  const size_t k_max,
-                  const RangeFieldType epsilon,
-                  const RangeFieldType taylor_tol,
-                  const size_t max_taylor_order,
-                  LocalCacheType& cache,
-                  AlphaStorageType& alpha_storage,
-                  std::mutex& mutex)
-      : LocalfunctionType(e)
-      , basis_functions_(basis_functions)
-      , v_points_(v_points)
-      , tau_(tau)
-      , epsilon_gamma_(epsilon_gamma)
-      , chi_(chi)
-      , xi_(xi)
-      , r_sequence_(r_sequence)
-      , k_0_(k_0)
-      , k_max_(k_max)
-      , epsilon_(epsilon)
-      , taylor_tol_(taylor_tol)
-      , max_taylor_order_(max_taylor_order)
-      , cache_(cache)
-      , alpha_storage_(alpha_storage)
-      , mutex_(mutex)
-    {}
-
-    using LocalfunctionType::entity;
-
-    static bool is_realizable(const RangeType& u)
-    {
-      for (const auto& u_i : u)
-        if (!(u_i > 0.) || std::isinf(u_i))
-          return false;
-      return true;
-    }
-
-    void store_alpha(const DomainType& x_local, const StateRangeType& alpha)
-    {
-      alpha_storage_[x_local] = alpha;
-    }
-
-    StateRangeType get_stored_alpha(const DomainType& x_local) const
-    {
-      return alpha_storage_.at(x_local);
-    }
-
-    template <class GridLayerType>
-    void center_results_to_intersections(const GridLayerType& grid_layer)
-    {
-      const auto center = entity().geometry().local(entity().geometry().center());
-      const auto center_alpha = get_stored_alpha(center);
-      for (const auto& intersection : Dune::intersections(grid_layer, entity()))
-        store_alpha(entity().geometry().local(intersection.geometry().center()), center_alpha);
-    }
-
-    std::unique_ptr<AlphaReturnType> get_alpha(const DomainType& x_local,
-                                               const StateRangeType& u,
-                                               const XT::Common::Parameter& param,
-                                               const bool regularize) const
-    {
-      const bool boundary = bool(param.get("boundary")[0]);
-      auto ret = std::make_unique<AlphaReturnType>();
-      mutex_.lock();
-      if (boundary)
-        cache_.set_capacity(cache_size + dimDomain);
-
-      // rescale u such that the density <psi> is 1
-      RangeFieldType density = basis_functions_.density(u);
-      if (!(density > 0.) || std::isinf(density)) {
-        mutex_.unlock();
-        DUNE_THROW(Dune::MathError, "Negative density!");
+    const auto& quadratures = basis_functions_.quadratures();
+    assert(quadratures.size() == grid_points_.size() - 1);
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (const auto& quad_point : quadratures[jj]) {
+        quad_points_[jj].emplace_back(quad_point.position()[0]);
+        quad_weights_[jj].emplace_back(quad_point.weight());
       }
-      RangeType u_prime = u / density;
-      RangeType alpha_iso = basis_functions_.alpha_iso();
-
-      // if value has already been calculated for these values, skip computation
-      const auto cache_iterator = cache_.find_closest(u_prime);
-      if (cache_iterator != cache_.end() && XT::Common::FloatCmp::eq(cache_iterator->first, u_prime, 1e-14, 1e-14)) {
-        const auto alpha_prime = cache_iterator->second;
-        ret->first = alpha_prime + alpha_iso * std::log(density);
-        ret->second = 0.;
-        alpha_storage_[x_local] = ret->first;
-        mutex_.unlock();
-        return ret;
-      } else {
-        RangeFieldType tau_prime = std::min(
-            tau_ / ((1 + std::sqrt(dimRange) * u_prime.two_norm()) * density + std::sqrt(dimRange) * tau_), tau_);
-        // The hessian H is always symmetric and tridiagonal, so we only need to store the diagonal and subdiagonal
-        // elements
-        RangeType H_diag;
-        FieldVector<RangeFieldType, dimRange - 1> H_subdiag;
-
-        // calculate moment vector for isotropic distribution
-        RangeType u_iso = basis_functions_.u_iso();
-        RangeType v;
-        RangeType alpha_k = cache_iterator != cache_.end() ? cache_iterator->second : alpha_iso;
-        const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
-        const auto r_max = r_sequence.back();
-        for (const auto& r : r_sequence_) {
-          // regularize u
-          v = u_prime;
-          if (r > 0) {
-            alpha_k = alpha_iso;
-            RangeType r_times_u_iso(u_iso);
-            r_times_u_iso *= r;
-            v *= 1 - r;
-            v += r_times_u_iso;
-          }
-
-          // calculate f_0
-          RangeFieldType f_k = calculate_f(alpha_k, v);
-
-          int pure_newton = 0;
-          for (size_t kk = 0; kk < k_max_; ++kk) {
-            // exit inner for loop to increase r if too many iterations are used
-            if (kk > k_0_ && r < r_max)
-              break;
-            // calculate gradient g
-            RangeType g_k = calculate_gradient(alpha_k, v);
-            // calculate Hessian H
-            calculate_hessian(alpha_k, H_diag, H_subdiag);
-            // calculate descent direction d_k;
-            RangeType d_k(0), minus_g_k(g_k);
-            minus_g_k *= -1;
-            try {
-              d_k = minus_g_k;
-              XT::LA::solve_sym_tridiag_posdef(H_diag, H_subdiag, d_k);
-            } catch (const Dune::MathError&) {
-              if (r < r_max) {
-                break;
-              } else {
-                mutex_.unlock();
-                //                std::cerr << "Failed to converge for " << XT::Common::to_string(u, 15) << " with
-                //                density "
-                //                          << XT::Common::to_string(density, 15) << " at position "
-                //                          << XT::Common::to_string(entity().geometry().center(), 15)
-                //                          << " due to errors in the Cholesky decomposition!" << std::endl;
-                DUNE_THROW(Dune::MathError, "Failure to converge!");
-              }
-            }
-
-            const auto& alpha_tilde = alpha_k;
-            const auto u_alpha_tilde = g_k + v;
-            auto density_tilde = basis_functions_.density(u_alpha_tilde);
-            if (!(density_tilde > 0.) || std::isinf(density_tilde))
-              break;
-            const auto alpha_prime = alpha_tilde - alpha_iso * std::log(density_tilde);
-            const auto u_alpha_prime = calculate_u(alpha_prime);
-            auto u_eps_diff = v - u_alpha_prime * (1 - epsilon_gamma_);
-            // checking realizability is cheap so we do not need the second stopping criterion
-            if (g_k.two_norm() < tau_prime && is_realizable(u_eps_diff)) {
-              ret->first = alpha_prime + alpha_iso * std::log(density);
-              ret->second = r;
-              cache_.insert(v, alpha_prime);
-              alpha_storage_[x_local] = ret->first;
-              goto outside_all_loops;
-            } else {
-              RangeFieldType zeta_k = 1;
-              // backtracking line search
-              while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm()) {
-                // while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.) {
-                // calculate alpha_new = alpha_k + zeta_k d_k
-                auto alpha_new = d_k;
-                alpha_new *= zeta_k;
-                alpha_new += alpha_k;
-                // calculate f(alpha_new)
-                RangeFieldType f_new = calculate_f(alpha_new, v);
-                if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
-                  alpha_k = alpha_new;
-                  f_k = f_new;
-                  pure_newton = 0.;
-                  break;
-                }
-                zeta_k = chi_ * zeta_k;
-              } // backtracking linesearch while
-              // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
-              if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm())
-                ++pure_newton;
-            } // else (stopping conditions)
-          } // k loop (Newton iterations)
-        } // r loop (Regularization parameter)
-        mutex_.unlock();
-        //        std::cerr << "Failed to converge for " << XT::Common::to_string(u, 15) << " with density "
-        //                  << XT::Common::to_string(density, 15) << " at position "
-        //                  << XT::Common::to_string(entity().geometry().center(), 15) << " due to too many iterations!"
-        //                  << std::endl;
-        DUNE_THROW(MathError, "Failed to converge");
-      } // else ( value has not been calculated before )
-
-    outside_all_loops:
-      mutex_.unlock();
-      return ret;
-    } // ... get_alpha(...)
-
-    virtual size_t order(const XT::Common::Parameter& /*param*/) const override
-    {
-      return 1;
-    }
-
-    virtual void evaluate(const DomainType& x_local,
-                          const StateRangeType& u,
-                          RangeType& ret,
-                          const XT::Common::Parameter& param) const override
-    {
-      const auto alpha = get_alpha(x_local, u, param, true)->first;
-
-      std::fill(ret.begin(), ret.end(), 0.);
-      // calculate < \mu m G_\alpha(u) >
-      for (size_t nn = 0; nn < dimRange; ++nn) {
-        if (nn > 0) {
-          if (std::abs(alpha[nn] - alpha[nn - 1]) > taylor_tol_) {
-            ret[nn] += 2. * std::pow(v_points_[nn] - v_points_[nn - 1], 2) / std::pow(alpha[nn] - alpha[nn - 1], 3)
-                           * (std::exp(alpha[nn]) - std::exp(alpha[nn - 1]))
-                       + (v_points_[nn] - v_points_[nn - 1]) / std::pow(alpha[nn] - alpha[nn - 1], 2)
-                             * (v_points_[nn - 1] * (std::exp(alpha[nn]) + std::exp(alpha[nn - 1]))
-                                - 2 * v_points_[nn] * std::exp(alpha[nn]))
-                       + v_points_[nn] * (v_points_[nn] - v_points_[nn - 1]) / (alpha[nn] - alpha[nn - 1])
-                             * std::exp(alpha[nn]);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha[nn] - alpha[nn - 1];
-            size_t ll = 0;
-            auto pow_frac = 1. / 6.;
-            while (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac * ((ll * ll + 3 * ll + 2) * v_points_[nn] + (ll + 1) * v_points_[nn - 1]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 3);
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha[nn - 1]);
-          }
-        }
-        if (nn < dimRange - 1) {
-          if (std::abs(alpha[nn + 1] - alpha[nn]) > taylor_tol_) {
-            ret[nn] += -2. * std::pow(v_points_[nn + 1] - v_points_[nn], 2) / std::pow(alpha[nn + 1] - alpha[nn], 3)
-                           * (std::exp(alpha[nn + 1]) - std::exp(alpha[nn]))
-                       + (v_points_[nn + 1] - v_points_[nn]) / std::pow(alpha[nn + 1] - alpha[nn], 2)
-                             * (v_points_[nn + 1] * (std::exp(alpha[nn + 1]) + std::exp(alpha[nn]))
-                                - 2 * v_points_[nn] * std::exp(alpha[nn]))
-                       - v_points_[nn] * (v_points_[nn + 1] - v_points_[nn]) / (alpha[nn + 1] - alpha[nn])
-                             * std::exp(alpha[nn]);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha[nn + 1] - alpha[nn];
-            size_t ll = 0;
-            auto pow_frac = 1. / 6.;
-            while (ll < 3 || (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(update, 0.))) {
-              update = pow_frac * (2 * v_points_[nn] + (ll + 1) * v_points_[nn + 1]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 3);
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha[nn]);
-          }
-        } // if (nn < dimRange - 1)
-      } // nn
-    } // void evaluate(...)
-
-    virtual void evaluate_col(const size_t DXTC_DEBUG_ONLY(col),
-                              const DomainType& x_local,
-                              const StateRangeType& u,
-                              ColRangeType& ret,
-                              const XT::Common::Parameter& param) const override
-    {
-      assert(col == 0);
-      evaluate(x_local, u, ret, param);
-    } // void evaluate_col(...)
-
-    virtual void partial_u(const DomainType& x_local,
-                           const StateRangeType& /*u*/,
-                           PartialURangeType& ret,
-                           const XT::Common::Parameter& /*param*/) const override
-    {
-      const auto alpha = get_stored_alpha(x_local);
-      RangeType H_diag, J_diag;
-      FieldVector<RangeFieldType, dimRange - 1> H_subdiag, J_subdiag;
-      calculate_hessian(alpha, H_diag, H_subdiag);
-      calculate_J(alpha, J_diag, J_subdiag);
-      calculate_J_Hinv(ret, J_diag, J_subdiag, H_diag, H_subdiag);
-    }
-
-    virtual void partial_u_col(const size_t DXTC_DEBUG_ONLY(col),
-                               const DomainType& x_local,
-                               const StateRangeType& u,
-                               ColPartialURangeType& ret,
-                               const XT::Common::Parameter& param) const override
-    {
-      assert(col == 0);
-      partial_u(x_local, u, ret, param);
-    }
-
-    static std::string static_id()
-    {
-      return "gdt.entropybasedlocalflux";
-    }
-
-  private:
-    RangeFieldType calculate_f(const RangeType& alpha_k, const RangeType& v) const
-    {
-      RangeFieldType ret(0);
-      for (size_t ii = 0; ii < dimRange - 1; ++ii) {
-        if (std::abs(alpha_k[ii + 1] - alpha_k[ii]) > taylor_tol_) {
-          ret += (v_points_[ii + 1] - v_points_[ii]) / (alpha_k[ii + 1] - alpha_k[ii])
-                 * (std::exp(alpha_k[ii + 1]) - std::exp(alpha_k[ii]));
-        } else {
-          RangeFieldType update = 1.;
-          RangeFieldType result = 0.;
-          size_t ll = 1;
-          RangeFieldType base = alpha_k[ii + 1] - alpha_k[ii];
-          auto pow_frac = 1.;
-          while (ll <= max_taylor_order_ && XT::Common::FloatCmp::ne(update, 0.)) {
-            update = pow_frac;
-            result += update;
-            ++ll;
-            pow_frac *= base / ll;
-          }
-          assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-          ret += result * (v_points_[ii + 1] - v_points_[ii]) * std::exp(alpha_k[ii]);
-        }
-      } // ii
-      ret -= alpha_k * v;
-      return ret;
-    } // .. calculate_f(...)
-
-    RangeType calculate_u(const RangeType& alpha_k) const
-    {
-      RangeType u(0);
-      for (size_t nn = 0; nn < dimRange; ++nn) {
-        if (nn > 0) {
-          if (std::abs(alpha_k[nn] - alpha_k[nn - 1]) > taylor_tol_) {
-            u[nn] += -(v_points_[nn] - v_points_[nn - 1]) / std::pow(alpha_k[nn] - alpha_k[nn - 1], 2)
-                         * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1]))
-                     + (v_points_[nn] - v_points_[nn - 1]) / (alpha_k[nn] - alpha_k[nn - 1]) * std::exp(alpha_k[nn]);
-          } else {
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_k[nn - 1] - alpha_k[nn];
-            size_t ll = 0;
-            RangeFieldType update = 1;
-            RangeFieldType pow_frac = 0.5;
-            while (ll <= max_taylor_order_ - 2 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac;
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 2);
-            }
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            u[nn] += result * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn]);
-          }
-        } // if (nn > 0)
-        if (nn < dimRange - 1) {
-          if (std::abs(alpha_k[nn + 1] - alpha_k[nn]) > taylor_tol_) {
-            u[nn] += (v_points_[nn + 1] - v_points_[nn]) / std::pow(alpha_k[nn + 1] - alpha_k[nn], 2)
-                         * (std::exp(alpha_k[nn + 1]) - std::exp(alpha_k[nn]))
-                     - (v_points_[nn + 1] - v_points_[nn]) / (alpha_k[nn + 1] - alpha_k[nn]) * std::exp(alpha_k[nn]);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            size_t ll = 0;
-            RangeFieldType base = alpha_k[nn + 1] - alpha_k[nn];
-            auto pow_frac = 0.5;
-            while (ll <= max_taylor_order_ - 2 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac;
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 2);
-            }
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            u[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha_k[nn]);
-          }
-        } // if (nn < dimRange-1)
-      } // nn
-      return u;
-    } // RangeType calculate_u(...)
-
-    RangeType calculate_gradient(const RangeType& alpha_k, const RangeType& v) const
-    {
-      return calculate_u(alpha_k) - v;
-    }
-
-    void calculate_hessian(const RangeType& alpha_k,
-                           RangeType& diag,
-                           FieldVector<RangeFieldType, dimRange - 1>& subdiag) const
-    {
-      std::fill(diag.begin(), diag.end(), 0.);
-      std::fill(subdiag.begin(), subdiag.end(), 0.);
-      for (size_t nn = 0; nn < dimRange; ++nn) {
-        if (nn > 0) {
-          if (std::abs(alpha_k[nn] - alpha_k[nn - 1]) > taylor_tol_) {
-            subdiag[nn - 1] =
-                (v_points_[nn] - v_points_[nn - 1])
-                * ((std::exp(alpha_k[nn]) + std::exp(alpha_k[nn - 1])) / std::pow(alpha_k[nn] - alpha_k[nn - 1], 2)
-                   - 2. * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1]))
-                         / std::pow(alpha_k[nn] - alpha_k[nn - 1], 3));
-            diag[nn] = (v_points_[nn] - v_points_[nn - 1])
-                       * ((-2. / std::pow(alpha_k[nn] - alpha_k[nn - 1], 2) + 1. / (alpha_k[nn] - alpha_k[nn - 1]))
-                              * std::exp(alpha_k[nn])
-                          + 2. / std::pow(alpha_k[nn] - alpha_k[nn - 1], 3)
-                                * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1])));
+    } // jj
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      M_[jj].resize(quad_points_[jj].size());
+      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll)
+        M_[jj][ll] = basis_functions_.evaluate_on_interval(quad_points_[jj][ll], jj);
+    } // jj
+  }
 
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_k[nn - 1] - alpha_k[nn];
-            RangeFieldType factor = (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn]);
-            size_t ll = 2;
-            auto pow_frac = 1. / 6.;
-            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac * (ll - 1.);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 1);
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            subdiag[nn - 1] += result * factor;
-
-            result = 0.;
-            update = 1;
-            ll = 3;
-            pow_frac = 2. / 6.;
-            while (ll <= max_taylor_order_ && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac;
-              result += update;
-              ++ll;
-              pow_frac *= base / ll;
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            diag[nn] += result * factor;
-          }
-        } // if (nn > 0)
-        if (nn < dimRange - 1) {
-          if (std::abs(alpha_k[nn + 1] - alpha_k[nn]) > taylor_tol_) {
-            diag[nn] += (v_points_[nn + 1] - v_points_[nn])
-                        * ((-2. / std::pow(alpha_k[nn + 1] - alpha_k[nn], 2) - 1. / (alpha_k[nn + 1] - alpha_k[nn]))
-                               * std::exp(alpha_k[nn])
-                           + 2. / std::pow(alpha_k[nn + 1] - alpha_k[nn], 3)
-                                 * (std::exp(alpha_k[nn + 1]) - std::exp(alpha_k[nn])));
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_k[nn + 1] - alpha_k[nn];
-            size_t ll = 3;
-            auto pow_frac = 2. / 6.;
-            while (ll <= max_taylor_order_ && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac;
-              result += update;
-              ++ll;
-              pow_frac *= base / ll;
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            diag[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha_k[nn]);
-          }
-        } // if (nn < dimRange - 1)
-      } // nn
-    } // void calculate_hessian(...)
+  virtual int order(const XT::Common::Parameter& /*param*/) const override
+  {
+    return 1;
+  }
 
-    void
-    calculate_J(const RangeType& alpha_k, RangeType& diag, FieldVector<RangeFieldType, dimRange - 1>& subdiag) const
-    {
-      std::fill(diag.begin(), diag.end(), 0.);
-      std::fill(subdiag.begin(), subdiag.end(), 0.);
-      for (size_t nn = 0; nn < dimRange; ++nn) {
-        if (nn > 0) {
-          if (std::abs(alpha_k[nn] - alpha_k[nn - 1]) > taylor_tol_) {
-            subdiag[nn - 1] =
-                (v_points_[nn] - v_points_[nn - 1])
-                    * ((v_points_[nn] * std::exp(alpha_k[nn]) + v_points_[nn - 1] * std::exp(alpha_k[nn - 1]))
-                           / std::pow(alpha_k[nn - 1] - alpha_k[nn], 2)
-                       + 2.
-                             * ((2 * v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn])
-                                - (2 * v_points_[nn - 1] - v_points_[nn]) * std::exp(alpha_k[nn - 1]))
-                             / std::pow(alpha_k[nn - 1] - alpha_k[nn], 3))
-                + 6. * std::pow(v_points_[nn] - v_points_[nn - 1], 2)
-                      * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn - 1]))
-                      / std::pow(alpha_k[nn - 1] - alpha_k[nn], 4);
-            diag[nn] = 6 * std::pow(v_points_[nn - 1] - v_points_[nn], 2)
-                           * (std::exp(alpha_k[nn - 1]) - std::exp(alpha_k[nn]))
-                           / std::pow(alpha_k[nn - 1] - alpha_k[nn], 4)
-                       + 2. * (v_points_[nn] - v_points_[nn - 1])
-                             * (v_points_[nn - 1] * std::exp(alpha_k[nn - 1])
-                                - (3 * v_points_[nn] - 2 * v_points_[nn - 1]) * std::exp(alpha_k[nn]))
-                             / std::pow(alpha_k[nn - 1] - alpha_k[nn], 3)
-                       - v_points_[nn] * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn])
-                             / (alpha_k[nn - 1] - alpha_k[nn])
-                       - (std::pow(v_points_[nn - 1], 2) - 4 * v_points_[nn] * v_points_[nn - 1]
-                          + 3. * std::pow(v_points_[nn], 2))
-                             * std::exp(alpha_k[nn]) / std::pow(alpha_k[nn - 1] - alpha_k[nn], 2);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_k[nn - 1] - alpha_k[nn];
-            RangeFieldType factor = (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha_k[nn]);
-            size_t ll = 0;
-            auto pow_frac = 1. / 24.;
-            while (ll < 2 || (ll <= max_taylor_order_ - 4 && XT::Common::FloatCmp::ne(update, 0.))) {
-              update = pow_frac * ((ll * ll + 3 * ll + 2) * v_points_[nn - 1] + (2 * ll + 2) * v_points_[nn]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 4);
-            } // ll
-            subdiag[nn - 1] += result * factor;
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
+  VectorType get_isotropic_alpha(const DomainType& u) const
+  {
+    static const auto alpha_iso = basis_functions_.alpha_iso();
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    return alpha_iso + alpha_iso_prime * std::log(basis_functions_.density(u));
+  }
 
-            result = 0.;
-            update = 1;
-            ll = 0;
-            pow_frac = 1. / 24.;
-            while (ll < 4 || (ll <= max_taylor_order_ - 4 && XT::Common::FloatCmp::ne(update, 0.))) {
-              update = pow_frac * (6 * v_points_[nn] + (2 * ll + 2) * v_points_[nn - 1]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 4);
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            diag[nn] += result * factor;
-          }
-        } // if (nn > 0)
-        if (nn < dimRange - 1) {
-          if (std::abs(alpha_k[nn + 1] - alpha_k[nn]) > taylor_tol_) {
-            diag[nn] += 6 * std::pow(v_points_[nn] - v_points_[nn + 1], 2)
-                            * (std::exp(alpha_k[nn]) - std::exp(alpha_k[nn + 1]))
-                            / std::pow(alpha_k[nn] - alpha_k[nn + 1], 4)
-                        + 2. * (v_points_[nn] - v_points_[nn + 1])
-                              * (v_points_[nn + 1] * std::exp(alpha_k[nn + 1])
-                                 - (3 * v_points_[nn] - 2 * v_points_[nn + 1]) * std::exp(alpha_k[nn]))
-                              / std::pow(alpha_k[nn] - alpha_k[nn + 1], 3)
-                        - v_points_[nn] * (v_points_[nn] - v_points_[nn + 1]) * std::exp(alpha_k[nn])
-                              / (alpha_k[nn] - alpha_k[nn + 1])
-                        + (std::pow(v_points_[nn + 1], 2) - 4 * v_points_[nn] * v_points_[nn + 1]
-                           + 3. * std::pow(v_points_[nn], 2))
-                              * std::exp(alpha_k[nn]) / std::pow(alpha_k[nn] - alpha_k[nn + 1], 2);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_k[nn + 1] - alpha_k[nn];
-            size_t ll = 0;
-            auto pow_frac = 1. / 24.;
-            while (ll < 4 || (ll <= max_taylor_order_ - 4 && XT::Common::FloatCmp::ne(update, 0.))) {
-              update = pow_frac * (6 * v_points_[nn] + (2 * ll + 2) * v_points_[nn + 1]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 4);
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            diag[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha_k[nn]);
-          }
-        } // if (nn < dimRange - 1)
-      } // nn
-    } // void calculate_J(...)
-
-    // calculates ret = J H^{-1}. Both J and H are symmetric tridiagonal, H is positive definite.
-    static void calculate_J_Hinv(MatrixType& ret,
-                                 const RangeType& J_diag,
-                                 const FieldVector<RangeFieldType, dimRange - 1>& J_subdiag,
-                                 RangeType& H_diag,
-                                 FieldVector<RangeFieldType, dimRange - 1>& H_subdiag)
-    {
-      // factorize H = LDL^T, where L is unit lower bidiagonal and D is diagonal
-      // H_diag is overwritten by the diagonal elements of D
-      // H_subdiag is overwritten by the subdiagonal elements of L
-      XT::LA::tridiagonal_ldlt(H_diag, H_subdiag);
-
-      // copy J to dense matrix
-      std::fill(ret.begin(), ret.end(), 0.);
-      for (size_t ii = 0; ii < dimRange - 1; ++ii) {
-        ret[ii][ii] = J_diag[ii];
-        ret[ii + 1][ii] = J_subdiag[ii];
-        ret[ii][ii + 1] = J_subdiag[ii];
-      }
-      ret[dimRange - 1][dimRange - 1] = J_diag[dimRange - 1];
-
-      // Solve ret H = J which is equivalent to (as H and J are symmetric) to H ret^T = J;
-      XT::LA::solve_tridiagonal_ldlt_factorized(H_diag, H_subdiag, ret);
-      // transpose ret
-      for (size_t ii = 0; ii < dimRange; ++ii)
-        for (size_t jj = 0; jj < ii; ++jj)
-          std::swap(ret[jj][ii], ret[ii][jj]);
-    } // void calculate_J_Hinv(...)
-
-    const BasisfunctionType& basis_functions_;
-    const std::vector<RangeFieldType>& v_points_;
-    const RangeFieldType tau_;
-    const RangeFieldType epsilon_gamma_;
-    const RangeFieldType chi_;
-    const RangeFieldType xi_;
-    const std::vector<RangeFieldType>& r_sequence_;
-    const size_t k_0_;
-    const size_t k_max_;
-    const RangeFieldType epsilon_;
-    const RangeFieldType taylor_tol_;
-    const size_t max_taylor_order_;
-    const std::string name_;
-    LocalCacheType& cache_;
-    AlphaStorageType& alpha_storage_;
-    std::mutex& mutex_;
-  }; // class Localfunction>
-
-  static std::string static_id()
-  {
-    return "gdt.entropybasedflux";
+  virtual RangeReturnType evaluate(const DomainType& u,
+                                   const XT::Common::Parameter& /*param*/ = {}) const override final
+  {
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
+    return evaluate_with_alpha(alpha);
   }
 
-  std::unique_ptr<LocalfunctionType> local_function(const EntityType& entity) const
+  virtual RangeReturnType evaluate_with_alpha(const VectorType& alpha) const
+  {
+    RangeReturnType ret(0.);
+    // calculate ret[ii] = < omega[ii] m G_\alpha(u) >
+    LocalVectorType local_alpha;
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (size_t ii = 0; ii < 2; ++ii)
+        local_alpha[ii] = alpha[jj + ii];
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M_[jj][ll];
+        auto factor_ll = std::exp(local_alpha * basis_ll) * quad_points_[jj][ll] * quad_weights_[jj][ll];
+        for (size_t ii = 0; ii < 2; ++ii)
+          ret[0][jj + ii] += basis_ll[ii] * factor_ll;
+      } // ll (quad points)
+    } // jj (intervals)
+    return ret;
+  } // void evaluate(...)
+
+  virtual DerivativeRangeReturnType jacobian(const DomainType& u,
+                                             const XT::Common::Parameter& /*param*/ = {}) const override final
   {
-    return derived_local_function(entity);
+    const auto alpha = get_alpha(u, get_isotropic_alpha(u), true)->first;
+    return jacobian_with_alpha(alpha);
   }
 
-  std::unique_ptr<Localfunction> derived_local_function(const EntityType& entity) const
-  {
-    const auto& index = index_set_.index(entity);
-    return std::make_unique<Localfunction>(entity,
-                                           basis_functions_,
-                                           v_points_,
-                                           tau_,
-                                           epsilon_gamma_,
-                                           chi_,
-                                           xi_,
-                                           r_sequence_,
-                                           k_0_,
-                                           k_max_,
-                                           epsilon_,
-                                           taylor_tol_,
-                                           max_taylor_order_,
-                                           cache_[index],
-                                           alpha_storage_[index],
-                                           mutexes_[index]);
+  virtual DerivativeRangeReturnType jacobian_with_alpha(const VectorType& alpha) const
+  {
+    DerivativeRangeReturnType ret;
+    VectorType H_diag, J_diag;
+    FieldVector<RangeFieldType, dimRange - 1> H_subdiag, J_subdiag;
+    calculate_hessian(alpha, M_, H_diag, H_subdiag);
+    calculate_J(M_, J_diag, J_subdiag);
+    calculate_J_Hinv(ret[0], J_diag, J_subdiag, H_diag, H_subdiag);
+    return ret;
   }
 
-  // calculate \sum_{i=1}^d < v_i_+ m \psi >_+ n_i, where n is the unit outer normal,
+  // calculate \sum_{i=1}^d < v_i m \psi > n_i, where n is the unit outer normal,
   // m is the basis function vector, phi_u is the ansatz corresponding to u
   // and x, v, t are the space, velocity and time variable, respectively
   // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
-  StateRangeType evaluate_kinetic_flux(const EntityType& entity,
-                                       const DomainType& x_local_entity,
-                                       const StateRangeType& /*u_i*/,
-                                       const EntityType& neighbor,
-                                       const DomainType& x_local_neighbor,
-                                       const StateRangeType u_j,
-                                       const DomainType& n_ij,
-                                       const size_t DXTC_DEBUG_ONLY(dd),
-                                       const XT::Common::Parameter& /*param*/,
-                                       const XT::Common::Parameter& param_neighbor) const
+  DomainType evaluate_kinetic_flux(const DomainType& u_i,
+                                   const DomainType& u_j,
+                                   const BasisDomainType& n_ij,
+                                   const size_t dd) const
+  {
+    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
+    const auto alpha_i = get_alpha(u_i, get_isotropic_alpha(u_i), true)->first;
+    const auto alpha_j = get_alpha(u_j, get_isotropic_alpha(u_j), true)->first;
+    evaluate_kinetic_flux_with_alphas(alpha_i, alpha_j, n_ij, dd);
+  } // DomainType evaluate_kinetic_flux(...)
+
+  DomainType evaluate_kinetic_flux_with_alphas(const VectorType& alpha_i,
+                                               const VectorType& alpha_j,
+                                               const BasisDomainType& n_ij,
+                                               const size_t dd) const
   {
     assert(dd == 0);
     // calculate < \mu m G_\alpha(u) > * n_ij
-    const auto local_function_entity = derived_local_function(entity);
-    const auto local_function_neighbor = derived_local_function(neighbor);
-    const auto alpha_i = local_function_entity->get_stored_alpha(x_local_entity);
-    StateRangeType alpha_j;
-    const bool boundary = bool(param_neighbor.get("boundary")[0]);
-    if (boundary)
-      alpha_j = local_function_neighbor->get_alpha(x_local_neighbor, u_j, param_neighbor, true)->first;
-    else
-      alpha_j = local_function_neighbor->get_stored_alpha(x_local_neighbor);
-    RangeType ret(0);
-    for (size_t nn = 0; nn < dimRange; ++nn) {
-      if (nn > 0) {
-        if (dimRange % 2 || nn != dimRange / 2) {
-          const auto& alpha = (n_ij[0] * (v_points_[nn - 1] + v_points_[nn]) / 2. > 0.) ? alpha_i : alpha_j;
-          if (std::abs(alpha[nn] - alpha[nn - 1]) > taylor_tol_) {
-            ret[nn] += 2. * std::pow(v_points_[nn] - v_points_[nn - 1], 2) / std::pow(alpha[nn] - alpha[nn - 1], 3)
-                           * (std::exp(alpha[nn]) - std::exp(alpha[nn - 1]))
-                       + (v_points_[nn] - v_points_[nn - 1]) / std::pow(alpha[nn] - alpha[nn - 1], 2)
-                             * (v_points_[nn - 1] * (std::exp(alpha[nn]) + std::exp(alpha[nn - 1]))
-                                - 2 * v_points_[nn] * std::exp(alpha[nn]))
-                       + v_points_[nn] * (v_points_[nn] - v_points_[nn - 1]) / (alpha[nn] - alpha[nn - 1])
-                             * std::exp(alpha[nn]);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha[nn - 1] - alpha[nn];
-            size_t ll = 0;
-            auto pow_frac = 1. / 6.;
-            while (ll < 3 || (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(update, 0.))) {
-              update = pow_frac * (2 * v_points_[nn] + (ll + 1) * v_points_[nn - 1]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 3);
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * (v_points_[nn] - v_points_[nn - 1]) * std::exp(alpha[nn]);
-          }
-        } else { //  if (dimRange % 2 || nn != dimRange/2)
-          const auto& alpha_pos = n_ij[0] > 0. ? alpha_i : alpha_j;
-          const auto& alpha_neg = n_ij[0] > 0. ? alpha_j : alpha_i;
-          if (std::abs(alpha_neg[nn] - alpha_neg[nn - 1]) > taylor_tol_) {
-            ret[nn] += -2. * std::pow(v_points_[nn], 2)
-                       * (4. / std::pow(alpha_neg[nn - 1] - alpha_neg[nn], 3)
-                              * (std::exp((alpha_neg[nn] + alpha_neg[nn - 1]) / 2.) - std::exp(alpha_neg[nn - 1]))
-                          + 1. / std::pow(alpha_neg[nn - 1] - alpha_neg[nn], 2)
-                                * (std::exp((alpha_neg[nn] + alpha_neg[nn - 1]) / 2.) + std::exp(alpha_neg[nn - 1])));
-
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_neg[nn] - alpha_neg[nn - 1];
-            size_t ll = 2;
-            auto pow_frac = 1. / 24.;
-            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac * (ll - 1.);
-              result += update;
-              ++ll;
-              pow_frac *= base / (2. * (ll + 1));
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * -2. * std::pow(v_points_[nn], 2) * std::exp(alpha_neg[nn - 1]);
-          }
-          if (std::abs(alpha_pos[nn] - alpha_pos[nn - 1]) > taylor_tol_) {
-            ret[nn] += 2. * std::pow(v_points_[nn], 2)
-                       * (4. / std::pow(alpha_pos[nn - 1] - alpha_pos[nn], 3)
-                              * (std::exp((alpha_pos[nn] + alpha_pos[nn - 1]) / 2.) - std::exp(alpha_pos[nn]))
-                          + 1. / std::pow(alpha_pos[nn - 1] - alpha_pos[nn], 2)
-                                * (std::exp((alpha_pos[nn] + alpha_pos[nn - 1]) / 2.) - 3. * std::exp(alpha_pos[nn]))
-                          - 1. / (alpha_pos[nn - 1] - alpha_pos[nn]) * std::exp(alpha_pos[nn]));
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_pos[nn - 1] - alpha_pos[nn];
-            auto pow_frac = 1. / 24.;
-            size_t ll = 2;
-            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac * (ll + 3);
-              result += update;
-              ++ll;
-              pow_frac *= base / (2. * (ll + 1));
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * 2. * std::pow(v_points_[nn], 2) * std::exp(alpha_pos[nn]);
-          } // else (alpha_n - alpha_{n-1} != 0)
-        } // else (dimRange % 2 || nn != dimRange/2)
-      } // if (nn > 0)
-      if (nn < dimRange - 1) {
-        if (dimRange % 2 || nn != dimRange / 2 - 1) {
-          const auto& alpha = (n_ij[0] * (v_points_[nn] + v_points_[nn + 1]) / 2. > 0.) ? alpha_i : alpha_j;
-          if (XT::Common::FloatCmp::ne(alpha[nn + 1], alpha[nn], 0., taylor_tol_)) {
-            ret[nn] += -2. * std::pow(v_points_[nn + 1] - v_points_[nn], 2) / std::pow(alpha[nn + 1] - alpha[nn], 3)
-                           * (std::exp(alpha[nn + 1]) - std::exp(alpha[nn]))
-                       + (v_points_[nn + 1] - v_points_[nn]) / std::pow(alpha[nn + 1] - alpha[nn], 2)
-                             * (v_points_[nn + 1] * (std::exp(alpha[nn + 1]) + std::exp(alpha[nn]))
-                                - 2 * v_points_[nn] * std::exp(alpha[nn]))
-                       - v_points_[nn] * (v_points_[nn + 1] - v_points_[nn]) / (alpha[nn + 1] - alpha[nn])
-                             * std::exp(alpha[nn]);
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha[nn + 1] - alpha[nn];
-            size_t ll = 0;
-            auto pow_frac = 1. / 6.;
-            while (ll < 3
-                   || (ll <= max_taylor_order_ - 3 && XT::Common::FloatCmp::ne(result, result + update, 1e-16, 0.))) {
-              update = pow_frac * (2 * v_points_[nn] + (ll + 1) * v_points_[nn + 1]);
-              result += update;
-              ++ll;
-              pow_frac *= base / (ll + 3);
-            } // ll
-            ret[nn] += result * (v_points_[nn + 1] - v_points_[nn]) * std::exp(alpha[nn]);
-          }
-        } else { // if (dimRange % 2 || nn != dimRange / 2 - 1)
-          const auto& alpha_pos = n_ij[0] > 0. ? alpha_i : alpha_j;
-          const auto& alpha_neg = n_ij[0] > 0. ? alpha_j : alpha_i;
-          if (std::abs(alpha_neg[nn + 1] - alpha_neg[nn]) > taylor_tol_) {
-            ret[nn] += -2. * std::pow(v_points_[nn + 1], 2)
-                       * (-4. / std::pow(alpha_neg[nn + 1] - alpha_neg[nn], 3)
-                              * (std::exp(alpha_neg[nn]) - std::exp((alpha_neg[nn + 1] + alpha_neg[nn]) / 2.))
-                          - 1. / std::pow(alpha_neg[nn + 1] - alpha_neg[nn], 2)
-                                * (3 * std::exp(alpha_neg[nn]) - std::exp((alpha_neg[nn + 1] + alpha_neg[nn]) / 2.))
-                          - 1. / (alpha_neg[nn + 1] - alpha_neg[nn]) * std::exp(alpha_neg[nn]));
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_neg[nn + 1] - alpha_neg[nn];
-            auto pow_frac = 1. / 24.;
-            size_t ll = 2;
-            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac * (ll + 3);
-              result += update;
-              ++ll;
-              pow_frac *= base / (2. * (ll + 1));
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * -2. * std::pow(v_points_[nn + 1], 2) * std::exp(alpha_neg[nn]);
-          }
-          if (std::abs(alpha_pos[nn + 1] - alpha_pos[nn]) > taylor_tol_) {
-            ret[nn] += 2. * std::pow(v_points_[nn + 1], 2)
-                       * (4. / std::pow(alpha_pos[nn + 1] - alpha_pos[nn], 3)
-                              * (std::exp((alpha_pos[nn + 1] + alpha_pos[nn]) / 2.) - std::exp(alpha_pos[nn + 1]))
-                          + 1. / std::pow(alpha_pos[nn + 1] - alpha_pos[nn], 2)
-                                * (std::exp((alpha_pos[nn + 1] + alpha_pos[nn]) / 2.) + std::exp(alpha_pos[nn + 1])));
-          } else {
-            RangeFieldType update = 1.;
-            RangeFieldType result = 0.;
-            RangeFieldType base = alpha_pos[nn] - alpha_pos[nn + 1];
-            auto pow_frac = 1. / 24.;
-            size_t ll = 2;
-            while (ll <= max_taylor_order_ - 1 && XT::Common::FloatCmp::ne(update, 0.)) {
-              update = pow_frac * (ll - 1.);
-              result += update;
-              ++ll;
-              pow_frac *= base / (2. * (ll + 1));
-            } // ll
-            assert(!(std::isinf(pow_frac) || std::isnan(pow_frac)));
-            ret[nn] += result * 2. * std::pow(v_points_[nn + 1], 2) * std::exp(alpha_pos[nn + 1]);
-          } // else (alpha_n - alpha_{n-1} != 0)
-        } // else (dimRange % 2 || nn != dimRange / 2 - 1)
-      } // if (nn < dimRange - 1)
-    } // nn
+    DomainType ret(0);
+    LocalVectorType local_alpha_i, local_alpha_j;
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (size_t ii = 0; ii < 2; ++ii) {
+        local_alpha_i[ii] = alpha_i[jj + ii];
+        local_alpha_j[ii] = alpha_j[jj + ii];
+      }
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M_[jj][ll];
+        const auto position = quad_points_[jj][ll];
+        RangeFieldType factor =
+            position * n_ij[0] > 0. ? std::exp(local_alpha_i * basis_ll) : std::exp(local_alpha_j * basis_ll);
+        factor *= quad_weights_[jj][ll] * position;
+        for (size_t ii = 0; ii < 2; ++ii)
+          ret[jj + ii] += basis_ll[ii] * factor;
+      } // ll (quad points)
+    } // jj (intervals)
     ret *= n_ij[0];
     return ret;
-  } // StateRangeType evaluate_kinetic_flux(...)
+  } // ... evaluate_kinetic_flux(...)
+
+  // returns (alpha, (actual_u, r)), where r is the regularization parameter and actual_u the regularized u
+  std::unique_ptr<AlphaReturnType>
+  get_alpha(const DomainType& u, const VectorType& alpha_in, const bool regularize) const
+  {
+    auto ret = std::make_unique<AlphaReturnType>();
+    // rescale u such that the density <psi> is 1
+    RangeFieldType density = basis_functions_.density(u);
+    if (!(density > 0.) || std::isinf(density))
+      DUNE_THROW(Dune::MathError, "Negative, inf or NaN density!");
+    static const auto alpha_iso_prime = basis_functions_.alpha_iso_prime();
+    VectorType u_prime = u / density;
+    VectorType alpha_initial = alpha_in - alpha_iso_prime * std::log(density);
+    RangeFieldType tau_prime =
+        std::min(tau_ / ((1 + std::sqrt(dimRange) * u_prime.two_norm()) * density + std::sqrt(dimRange) * tau_), tau_);
+    // The hessian H is always symmetric and tridiagonal, so we only need to store the diagonal and subdiagonal
+    // elements
+    VectorType H_diag;
+    FieldVector<RangeFieldType, dimRange - 1> H_subdiag;
+
+    // calculate moment vector for isotropic distribution
+    VectorType u_iso = basis_functions_.u_iso();
+    VectorType v;
+    VectorType alpha_k = alpha_initial;
+
+    const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
+    const auto r_max = r_sequence.back();
+    for (const auto& r : r_sequence_) {
+      // regularize u
+      v = u_prime;
+      if (r > 0) {
+        alpha_k = get_isotropic_alpha(u);
+        VectorType r_times_u_iso(u_iso);
+        r_times_u_iso *= r;
+        v *= 1 - r;
+        v += r_times_u_iso;
+      }
+
+      // calculate f_0
+      RangeFieldType f_k = calculate_f(alpha_k, v);
+
+      int pure_newton = 0;
+      VectorType g_k, d_k, minus_g_k, u_alpha_prime;
+      for (size_t kk = 0; kk < k_max_; ++kk) {
+        // exit inner for loop to increase r if too many iterations are used
+        if (kk > k_0_ && r < r_max)
+          break;
+        // calculate gradient g
+        calculate_gradient(alpha_k, v, g_k);
+        // calculate Hessian H
+        calculate_hessian(alpha_k, M_, H_diag, H_subdiag);
+        // calculate descent direction d_k;
+        minus_g_k = g_k;
+        minus_g_k *= -1;
+        try {
+          d_k = minus_g_k;
+          XT::LA::solve_sym_tridiag_posdef(H_diag, H_subdiag, d_k);
+        } catch (const Dune::MathError&) {
+          if (r < r_max)
+            break;
+          else
+            DUNE_THROW(Dune::MathError, "Failure to converge!");
+        }
+
+        const auto& alpha_tilde = alpha_k;
+        const auto u_alpha_tilde = g_k + v;
+        auto density_tilde = basis_functions_.density(u_alpha_tilde);
+        if (!(density_tilde > 0.) || std::isinf(density_tilde))
+          break;
+        const auto alpha_prime = alpha_tilde - alpha_iso_prime * std::log(density_tilde);
+        calculate_u(alpha_prime, u_alpha_prime);
+        auto u_eps_diff = v - u_alpha_prime * (1 - epsilon_gamma_);
+        // checking realizability is cheap so we do not need the second stopping criterion
+        if (g_k.two_norm() < tau_prime && is_realizable(u_eps_diff)) {
+          ret->first = alpha_prime + alpha_iso_prime * std::log(density);
+          ret->second = std::make_pair(v * density, r);
+          return ret;
+        } else {
+          RangeFieldType zeta_k = 1;
+          // backtracking line search
+          while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm()) {
+            // while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.) {
+            // calculate alpha_new = alpha_k + zeta_k d_k
+            auto alpha_new = d_k;
+            alpha_new *= zeta_k;
+            alpha_new += alpha_k;
+            // calculate f(alpha_new)
+            RangeFieldType f_new = calculate_f(alpha_new, v);
+            if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
+              alpha_k = alpha_new;
+              f_k = f_new;
+              pure_newton = 0.;
+              break;
+            }
+            zeta_k = chi_ * zeta_k;
+          } // backtracking linesearch while
+          // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
+          if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm())
+            ++pure_newton;
+        } // else (stopping conditions)
+      } // k loop (Newton iterations)
+    } // r loop (Regularization parameter)
+    DUNE_THROW(MathError, "Failed to converge");
+    return ret;
+  } // ... get_alpha(...)
 
-  const BasisfunctionType& basis_functions() const
+  const MomentBasis& basis_functions() const
   {
     return basis_functions_;
   }
 
 private:
-  const typename GridLayerType::IndexSet& index_set_;
-  const BasisfunctionType& basis_functions_;
-  const std::vector<RangeFieldType>& v_points_;
+  static bool is_realizable(const DomainType& u)
+  {
+    for (const auto& u_i : u)
+      if (!(u_i > 0.) || std::isinf(u_i))
+        return false;
+    return true;
+  }
+
+  FieldVector<std::vector<RangeFieldType>, num_intervals>& working_storage() const
+  {
+    thread_local FieldVector<std::vector<RangeFieldType>, num_intervals> work_vec;
+    for (size_t jj = 0; jj < num_intervals; ++jj)
+      work_vec[jj].resize(quad_points_[jj].size());
+    return work_vec;
+  }
+
+
+  RangeFieldType calculate_f(const VectorType& alpha, const VectorType& v) const
+  {
+    RangeFieldType ret(0.);
+    XT::Common::FieldVector<RangeFieldType, block_size> local_alpha;
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (size_t ii = 0; ii < 2; ++ii)
+        local_alpha[ii] = alpha[jj + ii];
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll)
+        ret += std::exp(local_alpha * M_[jj][ll]) * quad_weights_[jj][ll];
+    } // jj (intervals)
+    ret -= alpha * v;
+    return ret;
+  } // void calculate_u(...)
+
+  void calculate_u(const VectorType& alpha, VectorType& u) const
+  {
+    std::fill(u.begin(), u.end(), 0.);
+    LocalVectorType local_alpha;
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (size_t ii = 0; ii < 2; ++ii)
+        local_alpha[ii] = alpha[jj + ii];
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M_[jj][ll];
+        auto factor_ll = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
+        for (size_t ii = 0; ii < 2; ++ii)
+          u[jj + ii] += basis_ll[ii] * factor_ll;
+      } // ll (quad points)
+    } // jj (intervals)
+  } // void calculate_u(...)
+
+  void calculate_gradient(const VectorType& alpha, const VectorType& v, VectorType& g_k) const
+  {
+    calculate_u(alpha, g_k);
+    g_k -= v;
+  }
+
+  void calculate_hessian(const VectorType& alpha,
+                         const BasisValuesMatrixType& M,
+                         VectorType& H_diag,
+                         FieldVector<RangeFieldType, dimRange - 1>& H_subdiag) const
+  {
+    std::fill(H_diag.begin(), H_diag.end(), 0.);
+    std::fill(H_subdiag.begin(), H_subdiag.end(), 0.);
+    LocalVectorType local_alpha;
+    auto& work_vecs = working_storage();
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (size_t ii = 0; ii < 2; ++ii)
+        local_alpha[ii] = alpha[jj + ii];
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M[jj][ll];
+        work_vecs[jj][ll] = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
+        for (size_t ii = 0; ii < 2; ++ii)
+          H_diag[jj + ii] += std::pow(basis_ll[ii], 2) * work_vecs[jj][ll];
+        H_subdiag[jj] += basis_ll[0] * basis_ll[1] * work_vecs[jj][ll];
+      } // ll (quad points)
+    } // jj (intervals)
+  } // void calculate_hessian(...)
+
+  // J = df/dalpha is the derivative of the flux with respect to alpha.
+  // As F = (f_1, f_2, f_3) is matrix-valued
+  // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
+  // vector-valued),
+  // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
+  // this function returns the dd-th matrix df_dd/dalpha of J
+  // assumes work_vecs already contains the needed exp(alpha * m) values
+  void calculate_J(const BasisValuesMatrixType& M,
+                   VectorType& J_diag,
+                   FieldVector<RangeFieldType, dimRange - 1>& J_subdiag) const
+  {
+    std::fill(J_diag.begin(), J_diag.end(), 0.);
+    std::fill(J_subdiag.begin(), J_subdiag.end(), 0.);
+    const auto& work_vecs = working_storage();
+    for (size_t jj = 0; jj < num_intervals; ++jj) {
+      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
+        const auto& basis_ll = M[jj][ll];
+        for (size_t ii = 0; ii < 2; ++ii)
+          J_diag[jj + ii] += std::pow(basis_ll[ii], 2) * work_vecs[jj][ll] * quad_points_[jj][ll];
+        J_subdiag[jj] += basis_ll[0] * basis_ll[1] * work_vecs[jj][ll] * quad_points_[jj][ll];
+      } // ll (quad points)
+    } // jj (intervals)
+  } // void calculate_J(...)
+
+  // calculates ret = J H^{-1}. Both J and H are symmetric tridiagonal, H is positive definite.
+  static void calculate_J_Hinv(MatrixType& ret,
+                               const VectorType& J_diag,
+                               const FieldVector<RangeFieldType, dimRange - 1>& J_subdiag,
+                               VectorType& H_diag,
+                               FieldVector<RangeFieldType, dimRange - 1>& H_subdiag)
+  {
+    // factorize H = LDL^T, where L is unit lower bidiagonal and D is diagonal
+    // H_diag is overwritten by the diagonal elements of D
+    // H_subdiag is overwritten by the subdiagonal elements of L
+    XT::LA::tridiagonal_ldlt(H_diag, H_subdiag);
+
+    // copy J to dense matrix
+    std::fill(ret.begin(), ret.end(), 0.);
+    for (size_t ii = 0; ii < dimRange - 1; ++ii) {
+      ret[ii][ii] = J_diag[ii];
+      ret[ii + 1][ii] = J_subdiag[ii];
+      ret[ii][ii + 1] = J_subdiag[ii];
+    }
+    ret[dimRange - 1][dimRange - 1] = J_diag[dimRange - 1];
+
+    // Solve ret H = J which is equivalent to (as H and J are symmetric) to H ret^T = J;
+    XT::LA::solve_tridiagonal_ldlt_factorized(H_diag, H_subdiag, ret);
+    // transpose ret
+    for (size_t ii = 0; ii < dimRange; ++ii)
+      for (size_t jj = 0; jj < ii; ++jj)
+        std::swap(ret[jj][ii], ret[ii][jj]);
+  } // void calculate_J_Hinv(...)
+
+private:
+  const MomentBasis& basis_functions_;
+  QuadraturePointsType quad_points_;
+  QuadratureWeightsType quad_weights_;
+  const std::vector<RangeFieldType>& grid_points_;
+  BasisValuesMatrixType M_;
   const RangeFieldType tau_;
   const RangeFieldType epsilon_gamma_;
   const RangeFieldType chi_;
@@ -4192,634 +3449,286 @@ private:
   const size_t k_0_;
   const size_t k_max_;
   const RangeFieldType epsilon_;
-  const RangeFieldType taylor_tol_;
-  const size_t max_taylor_order_;
-  const std::string name_;
-  // Use unique_ptr in the vectors to avoid the memory cost for storing twice as many matrices or vectors as needed
-  // (see constructor)
-  mutable std::vector<LocalCacheType> cache_;
-  mutable std::vector<AlphaStorageType> alpha_storage_;
-  mutable std::vector<std::mutex> mutexes_;
 };
-#  endif
+#endif
 
 
-#  if 0
-/**
- * Specialization of EntropyBasedLocalFlux for 1D Hatfunctions (no change of basis)
- */
-template <class GridLayerImp, class U>
-class EntropyBasedLocalFlux<
-    HatFunctionMomentBasis<typename U::DomainFieldType, 1, typename U::RangeFieldType, U::dimRange, 1, 1>,
-    GridLayerImp,
-    U>
-  : public XT::Functions::LocalizableFluxFunctionInterface<typename GridLayerImp::template Codim<0>::Entity,
-                                                           typename U::DomainFieldType,
-                                                           GridLayerImp::dimension,
-                                                           U,
-                                                           0,
-                                                           typename U::RangeFieldType,
-                                                           U::dimRange,
-                                                           1>
+template <class KeyVectorType, class ValueVectorType>
+class EntropyLocalCache
 {
-  using BaseType =
-      typename XT::Functions::LocalizableFluxFunctionInterface<typename GridLayerImp::template Codim<0>::Entity,
-                                                               typename U::DomainFieldType,
-                                                               GridLayerImp::dimension,
-                                                               U,
-                                                               0,
-                                                               typename U::RangeFieldType,
-                                                               U::dimRange,
-                                                               1>;
-  using ThisType = EntropyBasedLocalFlux;
-
 public:
-  using BaseType::dimDomain;
-  using BaseType::dimRange;
-  using BaseType::dimRangeCols;
-  using typename BaseType::DomainFieldType;
-  using typename BaseType::DomainType;
-  using typename BaseType::EntityType;
-  using typename BaseType::LocalfunctionType;
-  using typename BaseType::PartialURangeType;
-  using typename BaseType::RangeFieldType;
-  using typename BaseType::RangeType;
-  using typename BaseType::StateRangeType;
-  using typename BaseType::StateType;
-  using BasisfunctionType = HatFunctionMomentBasis<DomainFieldType, 1, RangeFieldType, dimRange, 1, 1>;
-  using GridLayerType = GridLayerImp;
-  using QuadratureRuleType = Dune::QuadratureRule<DomainFieldType, 1>;
-  using MatrixType = FieldMatrix<RangeFieldType, dimRange, dimRange>;
-  using AlphaReturnType = typename std::pair<StateRangeType, RangeFieldType>;
-  using LocalCacheType = EntropyLocalCache<StateRangeType, StateRangeType>;
-  using AlphaStorageType = std::map<DomainType, StateRangeType, XT::Common::VectorFloatLess>;
-  static const size_t cache_size = 4 * dimDomain + 2;
-  static const size_t num_intervals = dimRange - 1;
-  static const size_t block_size = 2;
-  using LocalVectorType = XT::Common::FieldVector<RangeFieldType, block_size>;
-  using BasisValuesMatrixType = FieldVector<std::vector<LocalVectorType>, num_intervals>;
-  using QuadraturePointsType = FieldVector<std::vector<RangeFieldType>, num_intervals>;
-  using QuadratureWeightsType = FieldVector<std::vector<RangeFieldType>, num_intervals>;
+  using MapType = typename std::map<KeyVectorType, ValueVectorType, XT::Common::VectorLess>;
+  using IteratorType = typename MapType::iterator;
+  using ConstIteratorType = typename MapType::const_iterator;
+  using RangeFieldType = typename XT::Common::VectorAbstraction<KeyVectorType>::ScalarType;
 
-  explicit EntropyBasedLocalFlux(
-      const BasisfunctionType& basis_functions,
-      const GridLayerType& grid_layer,
-      const RangeFieldType tau = 1e-9,
-      const RangeFieldType epsilon_gamma = 0.01,
-      const RangeFieldType chi = 0.5,
-      const RangeFieldType xi = 1e-3,
-      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
-      const size_t k_0 = 500,
-      const size_t k_max = 1000,
-      const RangeFieldType epsilon = std::pow(2, -52),
-      const std::string name = static_id())
-    : index_set_(grid_layer.indexSet())
-    , basis_functions_(basis_functions)
-    , grid_points_(basis_functions_.triangulation())
-    , tau_(tau)
-    , epsilon_gamma_(epsilon_gamma)
-    , chi_(chi)
-    , xi_(xi)
-    , r_sequence_(r_sequence)
-    , k_0_(k_0)
-    , k_max_(k_max)
-    , epsilon_(epsilon)
-    , name_(name)
-    , cache_(index_set_.size(0), LocalCacheType(cache_size))
-    , alpha_storage_(index_set_.size(0))
-    , mutexes_(index_set_.size(0))
+  EntropyLocalCache(const size_t capacity = 0)
+    : capacity_(capacity)
+  {}
+
+  void insert(const KeyVectorType& u, const ValueVectorType& alpha)
   {
-    const auto& quadratures = basis_functions_.quadratures();
-    assert(quadratures.size() == grid_points_.size() - 1);
-    for (size_t jj = 0; jj < num_intervals; ++jj) {
-      for (const auto& quad_point : quadratures[jj]) {
-        quad_points_[jj].emplace_back(quad_point.position()[0]);
-        quad_weights_[jj].emplace_back(quad_point.weight());
+    cache_.insert(std::make_pair(u, alpha));
+    keys_.push_back(u);
+    if (cache_.size() > capacity_) {
+      cache_.erase(keys_.front());
+      keys_.pop_front();
+    }
+  }
+
+  std::pair<RangeFieldType, ConstIteratorType> find_closest(const KeyVectorType& u) const
+  {
+    ConstIteratorType ret = cache_.begin();
+    if (ret == end())
+      return std::make_pair(std::numeric_limits<RangeFieldType>::max(), ret);
+    auto diff = u - ret->first;
+    // use infinity_norm as distance
+    RangeFieldType distance = infinity_norm(diff);
+    auto it = ret;
+    while (++it != end()) {
+      if (XT::Common::FloatCmp::eq(distance, 0.))
+        break;
+      diff = u - it->first;
+      RangeFieldType new_distance = infinity_norm(diff);
+      if (new_distance < distance) {
+        distance = new_distance;
+        ret = it;
       }
-    } // jj
-    for (size_t jj = 0; jj < num_intervals; ++jj) {
-      M_[jj].resize(quad_points_[jj].size());
-      for (size_t ll = 0; ll < quad_points_[jj].size(); ++ll)
-        M_[jj][ll] = basis_functions_.evaluate_on_interval(quad_points_[jj][ll], jj);
-    } // jj
+    }
+    return std::make_pair(distance, ret);
+  }
+
+  IteratorType begin()
+  {
+    return cache_.begin();
+  }
+
+  ConstIteratorType begin() const
+  {
+    return cache_.begin();
+  }
+
+  IteratorType end()
+  {
+    return cache_.end();
+  }
+
+  ConstIteratorType end() const
+  {
+    return cache_.end();
+  }
+
+private:
+  static RangeFieldType infinity_norm(const KeyVectorType& vec)
+  {
+    RangeFieldType ret = std::abs(vec[0]);
+    for (size_t ii = 1; ii < vec.size(); ++ii)
+      ret = std::max(ret, std::abs(vec[ii]));
+    return ret;
   }
 
-  class Localfunction : public LocalfunctionType
-  {
-  public:
-    using LocalfunctionType::dimDomain;
-    using typename LocalfunctionType::ColPartialURangeType;
-    using typename LocalfunctionType::ColRangeType;
-
-    Localfunction(const EntityType& e,
-                  const BasisfunctionType& basis_functions,
-                  const QuadraturePointsType& quad_points,
-                  const QuadratureWeightsType& quad_weights,
-                  const std::vector<RangeFieldType>& grid_points,
-                  const BasisValuesMatrixType& M,
-                  const RangeFieldType tau,
-                  const RangeFieldType epsilon_gamma,
-                  const RangeFieldType chi,
-                  const RangeFieldType xi,
-                  const std::vector<RangeFieldType>& r_sequence,
-                  const size_t k_0,
-                  const size_t k_max,
-                  const RangeFieldType epsilon,
-                  LocalCacheType& cache,
-                  AlphaStorageType& alpha_storage,
-                  std::mutex& mutex)
-      : LocalfunctionType(e)
-      , basis_functions_(basis_functions)
-      , quad_points_(quad_points)
-      , quad_weights_(quad_weights)
-      , grid_points_(grid_points)
-      , M_(M)
-      , tau_(tau)
-      , epsilon_gamma_(epsilon_gamma)
-      , chi_(chi)
-      , xi_(xi)
-      , r_sequence_(r_sequence)
-      , k_0_(k_0)
-      , k_max_(k_max)
-      , epsilon_(epsilon)
-      , cache_(cache)
-      , alpha_storage_(alpha_storage)
-      , mutex_(mutex)
-    {}
+  size_t capacity_;
+  MapType cache_;
+  std::list<KeyVectorType> keys_;
+};
+
+
+template <class GridViewImp, class MomentBasisImp>
+class EntropyBasedFluxFunction
+  : public XT::Functions::FluxFunctionInterface<XT::Grid::extract_entity_t<GridViewImp>,
+                                                MomentBasisImp::dimRange,
+                                                MomentBasisImp::dimDomain,
+                                                MomentBasisImp::dimRange,
+                                                typename MomentBasisImp::R>
+{
+  using BaseType = typename XT::Functions::FluxFunctionInterface<XT::Grid::extract_entity_t<GridViewImp>,
+                                                                 MomentBasisImp::dimRange,
+                                                                 MomentBasisImp::dimDomain,
+                                                                 MomentBasisImp::dimRange,
+                                                                 typename MomentBasisImp::R>;
+  using ThisType = EntropyBasedFluxFunction;
+
+public:
+  using GridViewType = GridViewImp;
+  using MomentBasis = MomentBasisImp;
+  using IndexSetType = typename GridViewType::IndexSet;
+  static const size_t basis_dimDomain = MomentBasis::dimDomain;
+  static const size_t basis_dimRange = MomentBasis::dimRange;
+  using typename BaseType::DomainType;
+  using typename BaseType::E;
+  using typename BaseType::LocalFunctionType;
+  using typename BaseType::RangeFieldType;
+  using typename BaseType::StateType;
+  using ImplementationType = EntropyBasedFluxImplementation<MomentBasis>;
+  using AlphaReturnType = typename ImplementationType::AlphaReturnType;
+  using VectorType = typename ImplementationType::VectorType;
+  using LocalCacheType = EntropyLocalCache<StateType, VectorType>;
+  static const size_t cache_size = 4 * basis_dimDomain + 2;
+
+  explicit EntropyBasedFluxFunction(
+      const GridViewType& grid_view,
+      const MomentBasis& basis_functions,
+      const RangeFieldType tau = 1e-9,
+      const RangeFieldType epsilon_gamma = 0.01,
+      const RangeFieldType chi = 0.5,
+      const RangeFieldType xi = 1e-3,
+      const std::vector<RangeFieldType> r_sequence = {0, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 5e-2, 0.1, 0.5, 1},
+      const size_t k_0 = 500,
+      const size_t k_max = 1000,
+      const RangeFieldType epsilon = std::pow(2, -52))
+    : index_set_(grid_view.indexSet())
+    , entity_caches_(index_set_.size(0), LocalCacheType(cache_size))
+    , mutexes_(index_set_.size(0))
+    , implementation_(basis_functions, tau, epsilon_gamma, chi, xi, r_sequence, k_0, k_max, epsilon)
+  {}
 
-    using LocalfunctionType::entity;
+  static const constexpr bool available = true;
 
-    static bool is_realizable(const RangeType& u)
-    {
-      for (const auto& u_i : u)
-        if (!(u_i > 0.) || std::isinf(u_i))
-          return false;
-      return true;
-    }
+  class Localfunction : public LocalFunctionType
+  {
+    using BaseType = LocalFunctionType;
 
-    void store_alpha(const DomainType& x_local, const StateRangeType& alpha)
-    {
-      alpha_storage_[x_local] = alpha;
-    }
+  public:
+    using typename BaseType::E;
+    using typename BaseType::JacobianRangeReturnType;
+    using typename BaseType::RangeReturnType;
 
-    StateRangeType get_stored_alpha(const DomainType& x_local) const
-    {
-      return alpha_storage_.at(x_local);
-    }
+    Localfunction(const IndexSetType& index_set,
+                  std::vector<LocalCacheType>& entity_caches,
+                  std::vector<std::mutex>& mutexes,
+                  const ImplementationType& implementation)
+      : index_set_(index_set)
+      , thread_cache_(cache_size)
+      , entity_caches_(entity_caches)
+      , mutexes_(mutexes)
+      , implementation_(implementation)
+    {}
 
-    // temporary vectors to store inner products and exponentials
-    FieldVector<std::vector<RangeFieldType>, num_intervals>& working_storage() const
+    virtual void post_bind(const E& element) override final
     {
-      thread_local FieldVector<std::vector<RangeFieldType>, num_intervals> work_vec;
-      for (size_t jj = 0; jj < num_intervals; ++jj)
-        work_vec[jj].resize(quad_points_[jj].size());
-      return work_vec;
+      const auto index = index_set_.index(element);
+      entity_cache_ = &(entity_caches_[index]);
+      mutex_ = &(mutexes_[index]);
     }
 
-    template <class GridLayerType>
-    void center_results_to_intersections(const GridLayerType& grid_layer)
+    virtual int order(const XT::Common::Parameter&) const override final
     {
-      const auto center = entity().geometry().local(entity().geometry().center());
-      const auto center_alpha = get_stored_alpha(center);
-      for (const auto& intersection : Dune::intersections(grid_layer, entity()))
-        store_alpha(entity().geometry().local(intersection.geometry().center()), center_alpha);
+      return 1.;
     }
 
-    std::unique_ptr<AlphaReturnType> get_alpha(const DomainType& x_local,
-                                               const StateRangeType& u,
-                                               const XT::Common::Parameter& param,
-                                               const bool regularize) const
+    std::unique_ptr<AlphaReturnType> get_alpha(const StateType& u, const bool regularize) const
     {
-      const bool boundary = bool(param.get("boundary")[0]);
-      auto ret = std::make_unique<AlphaReturnType>();
-      mutex_.lock();
-      if (boundary)
-        cache_.set_capacity(cache_size + dimDomain);
-
-      // rescale u such that the density <psi> is 1
-      RangeFieldType density = basis_functions_.density(u);
-      if (!(density > 0.) || std::isinf(density)) {
-        mutex_.unlock();
-        DUNE_THROW(Dune::MathError, "Negative density!");
+      // find starting point. Candidates: alpha_iso and the entries in the two caches
+      std::lock_guard<std::mutex> DUNE_UNUSED(guard)(*mutex_);
+      const auto& basis_functions = implementation_.basis_functions();
+      static const auto u_iso = basis_functions.u_iso();
+      static const auto alpha_iso = basis_functions.alpha_iso();
+      static const auto alpha_iso_prime = basis_functions.alpha_iso_prime();
+      const auto density = basis_functions.density(u);
+      const auto u_iso_scaled = u_iso * density;
+      // calculate (inf-norm) distance to isotropic moment with same density
+      RangeFieldType distance = (u - u_iso_scaled).infinity_norm();
+      VectorType alpha_start = XT::Common::convert_to<VectorType>(alpha_iso + alpha_iso_prime * std::log(density));
+      if (!XT::Common::FloatCmp::eq(distance, 0.)) {
+        // calculate distance to closest moment in entity_cache
+        const auto entity_cache_dist_and_it = entity_cache_->find_closest(u);
+        const auto& entity_cache_dist = entity_cache_dist_and_it.first;
+        if (entity_cache_dist < distance) {
+          distance = entity_cache_dist;
+          alpha_start = entity_cache_dist_and_it.second->second;
+        }
+        if (!XT::Common::FloatCmp::eq(distance, 0.)) {
+          // calculate distance to closest moment in thread_cache
+          const auto thread_cache_dist_and_it = thread_cache_.find_closest(u);
+          const auto& thread_cache_dist = thread_cache_dist_and_it.first;
+          if (thread_cache_dist < distance) {
+            distance = thread_cache_dist;
+            alpha_start = thread_cache_dist_and_it.second->second;
+          }
+        }
       }
-      RangeType u_prime = u / density;
-      RangeType alpha_iso = basis_functions_.alpha_iso();
-
-      // if value has already been calculated for these values, skip computation
-      const auto cache_iterator = cache_.find_closest(u_prime);
-      if (cache_iterator != cache_.end() && XT::Common::FloatCmp::eq(cache_iterator->first, u_prime, 1e-14, 1e-14)) {
-        const auto alpha_prime = cache_iterator->second;
-        ret->first = alpha_prime + alpha_iso * std::log(density);
-        ret->second = 0.;
-        alpha_storage_[x_local] = ret->first;
-        mutex_.unlock();
-        return ret;
+      // If alpha_start is already the solution, we are finished. Else start optimization.
+      if (XT::Common::FloatCmp::eq(distance, 0.)) {
+        return std::make_unique<AlphaReturnType>(std::make_pair(alpha_start, std::make_pair(u, 0.)));
       } else {
-        RangeFieldType tau_prime = std::min(
-            tau_ / ((1 + std::sqrt(dimRange) * u_prime.two_norm()) * density + std::sqrt(dimRange) * tau_), tau_);
-        // The hessian H is always symmetric and tridiagonal, so we only need to store the diagonal and subdiagonal
-        // elements
-        RangeType H_diag;
-        FieldVector<RangeFieldType, dimRange - 1> H_subdiag;
-
-        // calculate moment vector for isotropic distribution
-        RangeType u_iso = basis_functions_.u_iso();
-        RangeType v;
-        RangeType alpha_k = cache_iterator != cache_.end() ? cache_iterator->second : alpha_iso;
-        const auto& r_sequence = regularize ? r_sequence_ : std::vector<RangeFieldType>{0.};
-        const auto r_max = r_sequence.back();
-        for (const auto& r : r_sequence_) {
-          // regularize u
-          v = u_prime;
-          if (r > 0) {
-            alpha_k = alpha_iso;
-            RangeType r_times_u_iso(u_iso);
-            r_times_u_iso *= r;
-            v *= 1 - r;
-            v += r_times_u_iso;
-          }
-
-          // calculate f_0
-          RangeFieldType f_k = calculate_f(alpha_k, v);
-
-          int pure_newton = 0;
-          StateRangeType g_k, d_k, minus_g_k, u_alpha_prime;
-          for (size_t kk = 0; kk < k_max_; ++kk) {
-            // exit inner for loop to increase r if too many iterations are used
-            if (kk > k_0_ && r < r_max)
-              break;
-            // calculate gradient g
-            calculate_gradient(alpha_k, v, g_k);
-            // calculate Hessian H
-            calculate_hessian(alpha_k, M_, H_diag, H_subdiag);
-            // calculate descent direction d_k;
-            minus_g_k = g_k;
-            minus_g_k *= -1;
-            try {
-              d_k = minus_g_k;
-              XT::LA::solve_sym_tridiag_posdef(H_diag, H_subdiag, d_k);
-            } catch (const Dune::MathError&) {
-              if (r < r_max) {
-                break;
-              } else {
-                mutex_.unlock();
-                DUNE_THROW(Dune::MathError, "Failure to converge!");
-              }
-            }
-
-            const auto& alpha_tilde = alpha_k;
-            const auto u_alpha_tilde = g_k + v;
-            auto density_tilde = basis_functions_.density(u_alpha_tilde);
-            if (!(density_tilde > 0.) || std::isinf(density_tilde))
-              break;
-            const auto alpha_prime = alpha_tilde - alpha_iso * std::log(density_tilde);
-            calculate_u(alpha_prime, u_alpha_prime);
-            auto u_eps_diff = v - u_alpha_prime * (1 - epsilon_gamma_);
-            // checking realizability is cheap so we do not need the second stopping criterion
-            if (g_k.two_norm() < tau_prime && is_realizable(u_eps_diff)) {
-              ret->first = alpha_prime + alpha_iso * std::log(density);
-              ret->second = r;
-              cache_.insert(v, alpha_prime);
-              alpha_storage_[x_local] = ret->first;
-              goto outside_all_loops;
-            } else {
-              RangeFieldType zeta_k = 1;
-              // backtracking line search
-              while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm()) {
-                // while (pure_newton >= 2 || zeta_k > epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.) {
-                // calculate alpha_new = alpha_k + zeta_k d_k
-                auto alpha_new = d_k;
-                alpha_new *= zeta_k;
-                alpha_new += alpha_k;
-                // calculate f(alpha_new)
-                RangeFieldType f_new = calculate_f(alpha_new, v);
-                if (pure_newton >= 2 || XT::Common::FloatCmp::le(f_new, f_k + xi_ * zeta_k * (g_k * d_k))) {
-                  alpha_k = alpha_new;
-                  f_k = f_new;
-                  pure_newton = 0.;
-                  break;
-                }
-                zeta_k = chi_ * zeta_k;
-              } // backtracking linesearch while
-              // if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm() * 100.)
-              if (zeta_k <= epsilon_ * alpha_k.two_norm() / d_k.two_norm())
-                ++pure_newton;
-            } // else (stopping conditions)
-          } // k loop (Newton iterations)
-        } // r loop (Regularization parameter)
-        mutex_.unlock();
-        DUNE_THROW(MathError, "Failed to converge");
-      } // else ( value has not been calculated before )
-
-    outside_all_loops:
-      mutex_.unlock();
-      return ret;
-    } // ... get_alpha(...)
-
-    virtual size_t order(const XT::Common::Parameter& /*param*/) const override
-    {
-      return 1;
-    }
-
-    virtual void evaluate(const DomainType& x_local,
-                          const StateRangeType& u,
-                          RangeType& ret,
-                          const XT::Common::Parameter& param) const override
-    {
-      std::fill(ret.begin(), ret.end(), 0.);
-      const auto alpha = get_alpha(x_local, u, param, true)->first;
-      // calculate ret[ii] = < omega[ii] m G_\alpha(u) >
-      LocalVectorType local_alpha;
-      for (size_t jj = 0; jj < num_intervals; ++jj) {
-        for (size_t ii = 0; ii < 2; ++ii)
-          local_alpha[ii] = alpha[jj + ii];
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-          const auto& basis_ll = M_[jj][ll];
-          auto factor_ll = std::exp(local_alpha * basis_ll) * quad_points_[jj][ll] * quad_weights_[jj][ll];
-          for (size_t ii = 0; ii < 2; ++ii)
-            ret[jj + ii] += basis_ll[ii] * factor_ll;
-        } // ll (quad points)
-      } // jj (intervals)
-    } // void evaluate(...)
-
-    virtual void evaluate_col(const size_t DXTC_DEBUG_ONLY(col),
-                              const DomainType& x_local,
-                              const StateRangeType& u,
-                              ColRangeType& ret,
-                              const XT::Common::Parameter& param) const override
-    {
-      assert(col == 0);
-      evaluate(x_local, u, ret, param);
-    } // void evaluate_col(...)
-
-    virtual void partial_u(const DomainType& x_local,
-                           const StateRangeType& /*u*/,
-                           PartialURangeType& ret,
-                           const XT::Common::Parameter& /*param*/) const override
-    {
-      const auto alpha = get_stored_alpha(x_local);
-      RangeType H_diag, J_diag;
-      FieldVector<RangeFieldType, dimRange - 1> H_subdiag, J_subdiag;
-      calculate_hessian(alpha, M_, H_diag, H_subdiag);
-      calculate_J(M_, J_diag, J_subdiag);
-      calculate_J_Hinv(ret, J_diag, J_subdiag, H_diag, H_subdiag);
+        auto ret = implementation_.get_alpha(u, alpha_start, regularize);
+        entity_cache_->insert(ret->second.first, ret->first);
+        thread_cache_.insert(ret->second.first, ret->first);
+        return std::move(ret);
+      }
     }
 
-    virtual void partial_u_col(const size_t DXTC_DEBUG_ONLY(col),
-                               const DomainType& x_local,
-                               const StateRangeType& u,
-                               ColPartialURangeType& ret,
-                               const XT::Common::Parameter& param) const override
+    virtual RangeReturnType evaluate(const DomainType& /*point_in_reference_element*/,
+                                     const StateType& u,
+                                     const XT::Common::Parameter& /*param*/ = {}) const override final
     {
-      assert(col == 0);
-      partial_u(x_local, u, ret, param);
+      const auto alpha = get_alpha(u, true)->first;
+      return implementation_.evaluate_with_alpha(alpha);
     }
 
-    static std::string static_id()
+    virtual JacobianRangeReturnType jacobian(const DomainType& /*point_in_reference_element*/,
+                                             const StateType& u,
+                                             const XT::Common::Parameter& /*param*/ = {}) const override final
     {
-      return "gdt.entropybasedlocalflux";
+      const auto alpha = get_alpha(u, true)->first;
+      return implementation_.jacobian_with_alpha(alpha);
     }
 
   private:
-    RangeFieldType calculate_f(const StateRangeType& alpha, const StateRangeType& v) const
-    {
-      RangeFieldType ret(0.);
-      XT::Common::FieldVector<RangeFieldType, block_size> local_alpha;
-      for (size_t jj = 0; jj < num_intervals; ++jj) {
-        for (size_t ii = 0; ii < 2; ++ii)
-          local_alpha[ii] = alpha[jj + ii];
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll)
-          ret += std::exp(local_alpha * M_[jj][ll]) * quad_weights_[jj][ll];
-      } // jj (intervals)
-      ret -= alpha * v;
-      return ret;
-    } // void calculate_u(...)
-
-    void calculate_u(const StateRangeType& alpha, StateRangeType& u) const
-    {
-      std::fill(u.begin(), u.end(), 0.);
-      LocalVectorType local_alpha;
-      for (size_t jj = 0; jj < num_intervals; ++jj) {
-        for (size_t ii = 0; ii < 2; ++ii)
-          local_alpha[ii] = alpha[jj + ii];
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-          const auto& basis_ll = M_[jj][ll];
-          auto factor_ll = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
-          for (size_t ii = 0; ii < 2; ++ii)
-            u[jj + ii] += basis_ll[ii] * factor_ll;
-        } // ll (quad points)
-      } // jj (intervals)
-    } // void calculate_u(...)
-
-    void calculate_gradient(const StateRangeType& alpha, const StateRangeType& v, StateRangeType& g_k) const
-    {
-      calculate_u(alpha, g_k);
-      g_k -= v;
-    }
+    const IndexSetType& index_set_;
+    mutable LocalCacheType thread_cache_;
+    std::vector<LocalCacheType>& entity_caches_;
+    std::vector<std::mutex>& mutexes_;
+    const ImplementationType& implementation_;
+    LocalCacheType* entity_cache_;
+    std::mutex* mutex_;
+  }; // class Localfunction
 
-    void calculate_hessian(const StateRangeType& alpha,
-                           const BasisValuesMatrixType& M,
-                           StateRangeType& H_diag,
-                           FieldVector<RangeFieldType, dimRange - 1>& H_subdiag) const
-    {
-      std::fill(H_diag.begin(), H_diag.end(), 0.);
-      std::fill(H_subdiag.begin(), H_subdiag.end(), 0.);
-      LocalVectorType local_alpha;
-      auto& work_vecs = working_storage();
-      for (size_t jj = 0; jj < num_intervals; ++jj) {
-        for (size_t ii = 0; ii < 2; ++ii)
-          local_alpha[ii] = alpha[jj + ii];
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-          const auto& basis_ll = M[jj][ll];
-          work_vecs[jj][ll] = std::exp(local_alpha * basis_ll) * quad_weights_[jj][ll];
-          for (size_t ii = 0; ii < 2; ++ii)
-            H_diag[jj + ii] += std::pow(basis_ll[ii], 2) * work_vecs[jj][ll];
-          H_subdiag[jj] += basis_ll[0] * basis_ll[1] * work_vecs[jj][ll];
-        } // ll (quad points)
-      } // jj (intervals)
-    } // void calculate_hessian(...)
-
-    // J = df/dalpha is the derivative of the flux with respect to alpha.
-    // As F = (f_1, f_2, f_3) is matrix-valued
-    // (div f = \sum_{i=1}^d \partial_{x_i} f_i  = \sum_{i=1}^d \partial_{x_i} < v_i m \hat{psi}(alpha) > is
-    // vector-valued),
-    // the derivative is the vector of matrices (df_1/dalpha, df_2/dalpha, ...)
-    // this function returns the dd-th matrix df_dd/dalpha of J
-    // assumes work_vecs already contains the needed exp(alpha * m) values
-    void calculate_J(const BasisValuesMatrixType& M,
-                     StateRangeType& J_diag,
-                     FieldVector<RangeFieldType, dimRange - 1>& J_subdiag) const
-    {
-      std::fill(J_diag.begin(), J_diag.end(), 0.);
-      std::fill(J_subdiag.begin(), J_subdiag.end(), 0.);
-      const auto& work_vecs = working_storage();
-      for (size_t jj = 0; jj < num_intervals; ++jj) {
-        for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-          const auto& basis_ll = M[jj][ll];
-          for (size_t ii = 0; ii < 2; ++ii)
-            J_diag[jj + ii] += std::pow(basis_ll[ii], 2) * work_vecs[jj][ll] * quad_points_[jj][ll];
-          J_subdiag[jj] += basis_ll[0] * basis_ll[1] * work_vecs[jj][ll] * quad_points_[jj][ll];
-        } // ll (quad points)
-      } // jj (intervals)
-    } // void calculate_J(...)
-
-    // calculates ret = J H^{-1}. Both J and H are symmetric tridiagonal, H is positive definite.
-    static void calculate_J_Hinv(MatrixType& ret,
-                                 const StateRangeType& J_diag,
-                                 const FieldVector<RangeFieldType, dimRange - 1>& J_subdiag,
-                                 StateRangeType& H_diag,
-                                 FieldVector<RangeFieldType, dimRange - 1>& H_subdiag)
-    {
-      // factorize H = LDL^T, where L is unit lower bidiagonal and D is diagonal
-      // H_diag is overwritten by the diagonal elements of D
-      // H_subdiag is overwritten by the subdiagonal elements of L
-      XT::LA::tridiagonal_ldlt(H_diag, H_subdiag);
-
-      // copy J to dense matrix
-      std::fill(ret.begin(), ret.end(), 0.);
-      for (size_t ii = 0; ii < dimRange - 1; ++ii) {
-        ret[ii][ii] = J_diag[ii];
-        ret[ii + 1][ii] = J_subdiag[ii];
-        ret[ii][ii + 1] = J_subdiag[ii];
-      }
-      ret[dimRange - 1][dimRange - 1] = J_diag[dimRange - 1];
-
-      // Solve ret H = J which is equivalent to (as H and J are symmetric) to H ret^T = J;
-      XT::LA::solve_tridiagonal_ldlt_factorized(H_diag, H_subdiag, ret);
-      // transpose ret
-      for (size_t ii = 0; ii < dimRange; ++ii)
-        for (size_t jj = 0; jj < ii; ++jj)
-          std::swap(ret[jj][ii], ret[ii][jj]);
-    } // void calculate_J_Hinv(...)
-
-    const BasisfunctionType& basis_functions_;
-    const QuadraturePointsType& quad_points_;
-    const QuadratureWeightsType& quad_weights_;
-    const std::vector<RangeFieldType>& grid_points_;
-    const BasisValuesMatrixType& M_;
-    const RangeFieldType tau_;
-    const RangeFieldType epsilon_gamma_;
-    const RangeFieldType chi_;
-    const RangeFieldType xi_;
-    const std::vector<RangeFieldType>& r_sequence_;
-    const size_t k_0_;
-    const size_t k_max_;
-    const RangeFieldType epsilon_;
-    const std::string name_;
-    LocalCacheType& cache_;
-    AlphaStorageType& alpha_storage_;
-    std::mutex& mutex_;
-  }; // class Localfunction>
-
-  static std::string static_id()
-  {
-    return "gdt.entropybasedflux";
+  virtual bool x_dependent() const override final
+  {
+    return false;
   }
 
-  std::unique_ptr<LocalfunctionType> local_function(const EntityType& entity) const
+  virtual std::unique_ptr<LocalFunctionType> local_function() const override final
   {
-    return derived_local_function(entity);
+    return std::make_unique<Localfunction>(index_set_, entity_caches_, mutexes_, implementation_);
   }
 
-  std::unique_ptr<Localfunction> derived_local_function(const EntityType& entity) const
-  {
-    const auto& index = index_set_.index(entity);
-    return std::make_unique<Localfunction>(entity,
-                                           basis_functions_,
-                                           quad_points_,
-                                           quad_weights_,
-                                           grid_points_,
-                                           M_,
-                                           tau_,
-                                           epsilon_gamma_,
-                                           chi_,
-                                           xi_,
-                                           r_sequence_,
-                                           k_0_,
-                                           k_max_,
-                                           epsilon_,
-                                           cache_[index],
-                                           alpha_storage_[index],
-                                           mutexes_[index]);
+  virtual std::unique_ptr<Localfunction> derived_local_function() const
+  {
+    return std::make_unique<Localfunction>(index_set_, entity_caches_, mutexes_, implementation_);
   }
 
-  // calculate \sum_{i=1}^d < v_i_+ m \psi >_+ n_i, where n is the unit outer normal,
-  // m is the basis function vector, phi_u is the ansatz corresponding to u
-  // and x, v, t are the space, velocity and time variable, respectively
-  // As we are using cartesian grids, n_i == 0 in all but one dimension, so only evaluate for i == dd
-  StateRangeType evaluate_kinetic_flux(const EntityType& entity,
-                                       const DomainType& x_local_entity,
-                                       const StateRangeType& /*u_i*/,
-                                       const EntityType& neighbor,
-                                       const DomainType& x_local_neighbor,
-                                       const StateRangeType u_j,
-                                       const DomainType& n_ij,
-                                       const size_t DXTC_DEBUG_ONLY(dd),
-                                       const XT::Common::Parameter& /*param*/,
-                                       const XT::Common::Parameter& param_neighbor) const
+  StateType evaluate_kinetic_flux(const E& inside_entity,
+                                  const E& outside_entity,
+                                  const StateType& u_i,
+                                  const StateType& u_j,
+                                  const DomainType& n_ij,
+                                  const size_t dd) const
   {
-    assert(dd == 0);
-    assert(XT::Common::FloatCmp::ne(n_ij[dd], 0.));
-    const bool boundary = static_cast<bool>(param_neighbor.get("boundary")[0]);
-    // calculate < \mu m G_\alpha(u) > * n_ij
-    const auto local_function_entity = derived_local_function(entity);
-    const auto local_function_neighbor = derived_local_function(neighbor);
-    const auto alpha_i = local_function_entity->get_stored_alpha(x_local_entity);
-    StateRangeType alpha_j;
-    if (boundary)
-      alpha_j = local_function_neighbor->get_alpha(x_local_neighbor, u_j, param_neighbor, true)->first;
-    else
-      alpha_j = local_function_neighbor->get_stored_alpha(x_local_neighbor);
-    StateRangeType ret(0);
-    LocalVectorType local_alpha_i, local_alpha_j;
-    for (size_t jj = 0; jj < num_intervals; ++jj) {
-      for (size_t ii = 0; ii < 2; ++ii) {
-        local_alpha_i[ii] = alpha_i[jj + ii];
-        local_alpha_j[ii] = alpha_j[jj + ii];
-      }
-      for (size_t ll = 0; ll < quad_weights_[jj].size(); ++ll) {
-        const auto& basis_ll = M_[jj][ll];
-        const auto position = quad_points_[jj][ll];
-        RangeFieldType factor =
-            position * n_ij[0] > 0. ? std::exp(local_alpha_i * basis_ll) : std::exp(local_alpha_j * basis_ll);
-        factor *= quad_weights_[jj][ll] * position;
-        for (size_t ii = 0; ii < 2; ++ii)
-          ret[jj + ii] += basis_ll[ii] * factor;
-      } // ll (quad points)
-    } // jj (intervals)
-    ret *= n_ij[0];
-    return ret;
-  } // StateRangeType evaluate_kinetic_flux(...)
+    // calculate \sum_{i=1}^d < \omega_i m G_\alpha(u) > n_i
+    const auto local_func = derived_local_function();
+    local_func->bind(inside_entity);
+    const auto alpha_i = local_func->get_alpha(u_i, true)->first;
+    local_func->bind(outside_entity);
+    const auto alpha_j = local_func->get_alpha(u_j, true)->first;
+    return implementation_.evaluate_kinetic_flux_with_alphas(alpha_i, alpha_j, n_ij, dd);
+  } // StateType evaluate_kinetic_flux(...)
 
-  const BasisfunctionType& basis_functions() const
+  const MomentBasis& basis_functions() const
   {
-    return basis_functions_;
+    return implementation_.basis_functions();
   }
 
 private:
-  const typename GridLayerType::IndexSet& index_set_;
-  const BasisfunctionType& basis_functions_;
-  QuadraturePointsType quad_points_;
-  QuadratureWeightsType quad_weights_;
-  const std::vector<RangeFieldType>& grid_points_;
-  BasisValuesMatrixType M_;
-  const RangeFieldType tau_;
-  const RangeFieldType epsilon_gamma_;
-  const RangeFieldType chi_;
-  const RangeFieldType xi_;
-  const std::vector<RangeFieldType> r_sequence_;
-  const size_t k_0_;
-  const size_t k_max_;
-  const RangeFieldType epsilon_;
-  const std::string name_;
-  // Use unique_ptr in the vectors to avoid the memory cost for storing twice as many matrices or vectors as needed
-  // (see constructor)
-  mutable std::vector<LocalCacheType> cache_;
-  mutable std::vector<AlphaStorageType> alpha_storage_;
+  const IndexSetType& index_set_;
+  mutable std::vector<LocalCacheType> entity_caches_;
   mutable std::vector<std::mutex> mutexes_;
+  ImplementationType implementation_;
 };
-#  endif
-#endif
+
+template <class GridViewImp, class MomentBasisImp>
+const size_t EntropyBasedFluxFunction<GridViewImp, MomentBasisImp>::cache_size;
 
 
 } // namespace GDT
diff --git a/dune/gdt/momentmodels/entropysolver.hh b/dune/gdt/momentmodels/entropysolver.hh
index 32f22ca02..676d5fa97 100644
--- a/dune/gdt/momentmodels/entropysolver.hh
+++ b/dune/gdt/momentmodels/entropysolver.hh
@@ -24,13 +24,13 @@ namespace Dune {
 namespace GDT {
 
 
-template <class SpaceType, class VectorType, class BasisfunctionType>
+template <class SpaceType, class VectorType, class MomentBasis>
 class LocalEntropySolver : public XT::Grid::ElementFunctor<typename SpaceType::GridViewType>
 {
   using GridViewType = typename SpaceType::GridViewType;
   using EntityType = typename GridViewType::template Codim<0>::Entity;
   using IndexSetType = typename GridViewType::IndexSet;
-  using EntropyFluxType = EntropyBasedFluxFunction<GridViewType, BasisfunctionType>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GridViewType, MomentBasis>;
   using RangeFieldType = typename EntropyFluxType::RangeFieldType;
   using LocalVectorType = typename EntropyFluxType::VectorType;
   static const size_t dimDomain = EntropyFluxType::basis_dimDomain;
@@ -113,22 +113,21 @@ private:
 }; // class LocalEntropySolver<...>
 
 
-template <class BasisfunctionImp,
+template <class MomentBasisImp,
           class SpaceImp,
-          class MatrixType = typename XT::LA::Container<typename BasisfunctionImp::RangeFieldType>::MatrixType>
-class EntropySolver
-  : public OperatorInterface<MatrixType, typename SpaceImp::GridViewType, BasisfunctionImp::dimRange, 1>
+          class MatrixType = typename XT::LA::Container<typename MomentBasisImp::RangeFieldType>::MatrixType>
+class EntropySolver : public OperatorInterface<MatrixType, typename SpaceImp::GridViewType, MomentBasisImp::dimRange, 1>
 {
-  using BaseType = OperatorInterface<MatrixType, typename SpaceImp::GridViewType, BasisfunctionImp::dimRange, 1>;
+  using BaseType = OperatorInterface<MatrixType, typename SpaceImp::GridViewType, MomentBasisImp::dimRange, 1>;
 
 public:
   using typename BaseType::VectorType;
-  using BasisfunctionType = BasisfunctionImp;
+  using MomentBasis = MomentBasisImp;
   using SpaceType = SpaceImp;
   using SourceSpaceType = SpaceImp;
   using RangeSpaceType = SpaceImp;
-  using EntropyFluxType = EntropyBasedFluxFunction<typename SpaceType::GridViewType, BasisfunctionType>;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
+  using EntropyFluxType = EntropyBasedFluxFunction<typename SpaceType::GridViewType, MomentBasis>;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
   using LocalVectorType = typename EntropyFluxType::VectorType;
 
   EntropySolver(const EntropyFluxType& analytical_flux,
@@ -158,7 +157,7 @@ public:
 
   void apply(const VectorType& source, VectorType& range, const XT::Common::Parameter& param) const override final
   {
-    LocalEntropySolver<SpaceType, VectorType, BasisfunctionType> local_entropy_solver(
+    LocalEntropySolver<SpaceType, VectorType, MomentBasis> local_entropy_solver(
         space_, source, range, analytical_flux_, min_acceptable_density_, param, filename_);
     auto walker = XT::Grid::Walker<typename SpaceType::GridViewType>(space_.grid_view());
     walker.append(local_entropy_solver);
diff --git a/dune/gdt/operators/entropybasedmoments_fv.hh b/dune/gdt/operators/advection-fv-entropybased.hh
similarity index 100%
rename from dune/gdt/operators/entropybasedmoments_fv.hh
rename to dune/gdt/operators/advection-fv-entropybased.hh
diff --git a/dune/gdt/operators/reconstruction/slopes.hh b/dune/gdt/operators/reconstruction/slopes.hh
index a24d69622..614fbc579 100644
--- a/dune/gdt/operators/reconstruction/slopes.hh
+++ b/dune/gdt/operators/reconstruction/slopes.hh
@@ -303,12 +303,12 @@ public:
 }; // class SuperbeeSlope
 
 
-template <class GV, class BasisfunctionType>
+template <class GV, class MomentBasis>
 class RealizabilityLimiterBase
 {
 public:
   using E = XT::Grid::extract_entity_t<GV>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
   using StateType = typename EntropyFluxType::StateType;
 
   RealizabilityLimiterBase(const EntropyFluxType& entropy_flux)
@@ -346,17 +346,17 @@ private:
 // Realizability limiter that ensures positivity of the components of u in noncharacteristic variables. Uses single
 // limiter variable for all components.
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
           class EigenVectorWrapperType,
           class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType>>
 class PositivityLimitedSlope
   : public SlopeBase<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType, 3>
-  , public RealizabilityLimiterBase<GV, BasisfunctionType>
+  , public RealizabilityLimiterBase<GV, MomentBasis>
 {
   using ThisType = PositivityLimitedSlope;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
-  static const size_t dimRange = BasisfunctionType::dimRange;
-  using RealizabilityBaseType = RealizabilityLimiterBase<GV, BasisfunctionType>;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
+  static const size_t dimRange = MomentBasis::dimRange;
+  using RealizabilityBaseType = RealizabilityLimiterBase<GV, MomentBasis>;
   using typename RealizabilityBaseType::E;
   using typename RealizabilityBaseType::EntropyFluxType;
   using BaseType = SlopeBase<E, EigenVectorWrapperType, 3>;
@@ -432,8 +432,8 @@ class Dg1dRealizabilityLimitedSlope
   , public RealizabilityLimiterBase<GV, PartialMomentBasis<typename GV::ctype, 1, RangeFieldType, dimRange, 1, 1>>
 {
   using ThisType = Dg1dRealizabilityLimitedSlope;
-  using BasisfunctionType = Dune::GDT::PartialMomentBasis<RangeFieldType, 1, RangeFieldType, dimRange, 1, 1, 1>;
-  using RealizabilityBaseType = RealizabilityLimiterBase<GV, BasisfunctionType>;
+  using MomentBasis = Dune::GDT::PartialMomentBasis<RangeFieldType, 1, RangeFieldType, dimRange, 1, 1, 1>;
+  using RealizabilityBaseType = RealizabilityLimiterBase<GV, MomentBasis>;
   using typename RealizabilityBaseType::E;
   using typename RealizabilityBaseType::EntropyFluxType;
   using BaseType = SlopeBase<E, EigenVectorWrapperType, 3>;
@@ -444,7 +444,7 @@ public:
 
   // This limiter ensures u_i >= epsilon for all components u_i of u.
   Dg1dRealizabilityLimitedSlope(const EntropyFluxType& entropy_flux,
-                                const BasisfunctionType& basis_functions,
+                                const MomentBasis& basis_functions,
                                 const RangeFieldType epsilon = 0.)
     : RealizabilityBaseType(entropy_flux)
     , basis_functions_(basis_functions)
@@ -513,7 +513,7 @@ private:
     return ret;
   }
 
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   const RangeFieldType epsilon_;
   const SlopeType slope_limiter_;
 }; // class Dg1dRealizabilityLimitedSlope<...>
@@ -523,17 +523,17 @@ private:
 // Realizability limiter that ensures that the limited values are within the convex hull of the quadrature points. Uses
 // single limiter variable for all components.
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
           class EigenVectorWrapperType,
           class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType>>
 class ConvexHullRealizabilityLimitedSlope
   : public SlopeBase<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType, 3>
-  , public RealizabilityLimiterBase<GV, BasisfunctionType>
+  , public RealizabilityLimiterBase<GV, MomentBasis>
 {
   using ThisType = ConvexHullRealizabilityLimitedSlope;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
-  static const size_t dimRange = BasisfunctionType::dimRange;
-  using RealizabilityBaseType = RealizabilityLimiterBase<GV, BasisfunctionType>;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
+  static const size_t dimRange = MomentBasis::dimRange;
+  using RealizabilityBaseType = RealizabilityLimiterBase<GV, MomentBasis>;
   using typename RealizabilityBaseType::E;
   using typename RealizabilityBaseType::EntropyFluxType;
   using BaseType = SlopeBase<E, EigenVectorWrapperType, 3>;
@@ -544,7 +544,7 @@ public:
   using typename BaseType::StencilType;
 
   ConvexHullRealizabilityLimitedSlope(const EntropyFluxType& entropy_flux,
-                                      const BasisfunctionType& basis_functions,
+                                      const MomentBasis& basis_functions,
                                       const RangeFieldType epsilon = 0.)
     : RealizabilityBaseType(entropy_flux)
     , basis_functions_(basis_functions)
@@ -622,7 +622,7 @@ private:
     }
   } // void calculate_plane_coefficients()
 
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   const RangeFieldType epsilon_;
   const SlopeType slope_limiter_;
   PlaneCoefficientsType plane_coefficients_;
@@ -632,20 +632,20 @@ private:
 // Realizability limiter that ensures that the limited values are within the convex hull of the quadrature points. Uses
 // single limiter variable for all components.
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
           class EigenVectorWrapperType,
 
           class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType>>
 class DgConvexHullRealizabilityLimitedSlope
   : public SlopeBase<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType, 3>
-  , public RealizabilityLimiterBase<GV, BasisfunctionType>
+  , public RealizabilityLimiterBase<GV, MomentBasis>
 {
   using ThisType = DgConvexHullRealizabilityLimitedSlope;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
-  static const size_t dimRange = BasisfunctionType::dimRange;
-  static const size_t block_size = (BasisfunctionType::dimDomain == 1) ? 2 : 4;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
+  static const size_t dimRange = MomentBasis::dimRange;
+  static const size_t block_size = (MomentBasis::dimDomain == 1) ? 2 : 4;
   static const size_t num_blocks = dimRange / block_size;
-  using RealizabilityBaseType = RealizabilityLimiterBase<GV, BasisfunctionType>;
+  using RealizabilityBaseType = RealizabilityLimiterBase<GV, MomentBasis>;
   using typename RealizabilityBaseType::E;
   using typename RealizabilityBaseType::EntropyFluxType;
   using BaseType = SlopeBase<E, EigenVectorWrapperType, 3>;
@@ -658,7 +658,7 @@ public:
   using typename BaseType::StencilType;
 
   DgConvexHullRealizabilityLimitedSlope(const EntropyFluxType& entropy_flux,
-                                        const BasisfunctionType& basis_functions,
+                                        const MomentBasis& basis_functions,
                                         const RangeFieldType epsilon = 0.)
     : RealizabilityBaseType(entropy_flux)
     , basis_functions_(basis_functions)
@@ -734,7 +734,7 @@ private:
     return theta;
   } // ... get_block_theta(...)
 
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   const RangeFieldType epsilon_;
   const SlopeType slope_limiter_;
   PlaneCoefficientsType plane_coefficients_;
@@ -743,27 +743,25 @@ private:
 #else // HAVE_QHULL
 
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
 
-          class SlopeType =
-              MinmodSlope<XT::Grid::extract_entity_t<GV>,
-                          FieldVector<typename BasisfunctionType::RangeFieldType, BasisfunctionType::dimRange>,
-                          EigenVectorWrapperType>>
+          class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>,
+                                        FieldVector<typename MomentBasis::RangeFieldType, MomentBasis::dimRange>,
+                                        EigenVectorWrapperType>>
 class ConvexHullRealizabilityLimitedSlope
 {
-  static_assert(Dune::AlwaysFalse<BasisfunctionType>::value, "You are missing Qhull!");
+  static_assert(Dune::AlwaysFalse<MomentBasis>::value, "You are missing Qhull!");
 };
 
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
 
-          class SlopeType =
-              MinmodSlope<XT::Grid::extract_entity_t<GV>,
-                          FieldVector<typename BasisfunctionType::RangeFieldType, BasisfunctionType::dimRange>,
-                          EigenVectorWrapperType>>
+          class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>,
+                                        FieldVector<typename MomentBasis::RangeFieldType, MomentBasis::dimRange>,
+                                        EigenVectorWrapperType>>
 class DgConvexHullRealizabilityLimitedSlopeSlope
 {
-  static_assert(Dune::AlwaysFalse<BasisfunctionType>::value, "You are missing Qhull!");
+  static_assert(Dune::AlwaysFalse<MomentBasis>::value, "You are missing Qhull!");
 };
 #endif // HAVE_QHULL
 
@@ -771,17 +769,17 @@ class DgConvexHullRealizabilityLimitedSlopeSlope
 // Characteristic component-wise realizability limiter that ensures positivity of the components of u in
 // noncharacteristic variables by solving a linear program.
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
           class EigenVectorWrapperType,
           class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType>>
 class LpPositivityLimitedSlope
   : public SlopeBase<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType, 3>
-  , public RealizabilityLimiterBase<GV, BasisfunctionType>
+  , public RealizabilityLimiterBase<GV, MomentBasis>
 {
   using ThisType = LpPositivityLimitedSlope;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
-  static const size_t dimRange = BasisfunctionType::dimRange;
-  using RealizabilityBaseType = RealizabilityLimiterBase<GV, BasisfunctionType>;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
+  static const size_t dimRange = MomentBasis::dimRange;
+  using RealizabilityBaseType = RealizabilityLimiterBase<GV, MomentBasis>;
   using typename RealizabilityBaseType::E;
   using typename RealizabilityBaseType::EntropyFluxType;
   using BaseType = SlopeBase<E, EigenVectorWrapperType, 3>;
@@ -910,19 +908,19 @@ private:
 // Realizability limiter that solves a linear program to ensure the reconstructed values are still in the numerically
 // realizable set, i.e. in the convex hull of basis evaluations.
 template <class GV,
-          class BasisfunctionType,
+          class MomentBasis,
           class EigenVectorWrapperType,
           class SlopeType = MinmodSlope<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType>>
 class LpConvexhullRealizabilityLimitedSlope
   : public SlopeBase<XT::Grid::extract_entity_t<GV>, EigenVectorWrapperType>
-  , public RealizabilityLimiterBase<GV, BasisfunctionType>
+  , public RealizabilityLimiterBase<GV, MomentBasis>
 {
   using ThisType = LpConvexhullRealizabilityLimitedSlope;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
-  static constexpr size_t dimRange = BasisfunctionType::dimRange;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
+  static constexpr size_t dimRange = MomentBasis::dimRange;
   static_assert(dimRange < std::numeric_limits<int>::max(), "");
   static constexpr size_t num_rows = dimRange;
-  using RealizabilityBaseType = RealizabilityLimiterBase<GV, BasisfunctionType>;
+  using RealizabilityBaseType = RealizabilityLimiterBase<GV, MomentBasis>;
   using typename RealizabilityBaseType::E;
   using typename RealizabilityBaseType::EntropyFluxType;
   using BaseType = SlopeBase<E, EigenVectorWrapperType, 3>;
@@ -933,7 +931,7 @@ public:
   using typename BaseType::StencilType;
 
   LpConvexhullRealizabilityLimitedSlope(const EntropyFluxType& entropy_flux,
-                                        const BasisfunctionType& basis_functions,
+                                        const MomentBasis& basis_functions,
                                         const RangeFieldType epsilon)
     : RealizabilityBaseType(entropy_flux)
     , epsilon_(epsilon)
@@ -1104,7 +1102,7 @@ private:
   }
 
   const RangeFieldType epsilon_;
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   std::shared_ptr<std::vector<VectorType>> basis_values_;
   mutable std::unique_ptr<ClpSimplex> lp_;
   mutable std::unique_ptr<MatrixType> A_tilde_transposed_;
diff --git a/dune/gdt/test/mn-discretization.hh b/dune/gdt/test/mn-discretization.hh
index e19f4476a..a3ad07290 100644
--- a/dune/gdt/test/mn-discretization.hh
+++ b/dune/gdt/test/mn-discretization.hh
@@ -20,16 +20,16 @@
 #include <dune/xt/la/container.hh>
 
 #include <dune/gdt/discretefunction/default.hh>
-#include <dune/gdt/operators/entropybasedmoments_fv.hh>
+#include <dune/gdt/operators/advection-fv-entropybased.hh>
 #include <dune/gdt/operators/advection-fv.hh>
 #include <dune/gdt/interpolations/default.hh>
 #include <dune/gdt/momentmodels/entropysolver.hh>
 #include <dune/gdt/local/numerical-fluxes/kinetic.hh>
 #include <dune/gdt/local/operators/advection-fv.hh>
 #include <dune/gdt/spaces/l2/finite-volume.hh>
-#include <dune/gdt/timestepper/fractional-step.hh>
-#include <dune/gdt/timestepper/explicit-rungekutta.hh>
-#include <dune/gdt/timestepper/matrix-exponential-kinetic-isotropic.hh>
+#include <dune/gdt/tools/timestepper/fractional-step.hh>
+#include <dune/gdt/tools/timestepper/explicit-rungekutta.hh>
+#include <dune/gdt/tools/timestepper/matrix-exponential-kinetic-isotropic.hh>
 
 #include <dune/gdt/test/momentmodels/kineticequation.hh>
 
@@ -53,7 +53,7 @@ struct HyperbolicMnDiscretization
     using namespace Dune::GDT;
 
     //******************* get typedefs and constants from ProblemType **********************//
-    using BasisfunctionType = typename TestCaseType::BasisfunctionType;
+    using MomentBasis = typename TestCaseType::MomentBasis;
     using DiscreteFunctionType = typename TestCaseType::DiscreteFunctionType;
     using GridType = typename TestCaseType::GridType;
     using SpaceType = typename TestCaseType::SpaceType;
@@ -61,10 +61,10 @@ struct HyperbolicMnDiscretization
     using GV = typename TestCaseType::GridViewType;
     using I = XT::Grid::extract_intersection_t<GV>;
     using ProblemType = typename TestCaseType::ProblemType;
-    using RangeFieldType = typename BasisfunctionType::RangeFieldType;
+    using RangeFieldType = typename MomentBasis::RangeFieldType;
     using BoundaryValueType = typename ProblemType::BoundaryValueType;
-    static constexpr size_t dimDomain = BasisfunctionType::dimDomain;
-    static constexpr size_t dimRange = BasisfunctionType::dimRange;
+    static constexpr size_t dimDomain = MomentBasis::dimDomain;
+    static constexpr size_t dimRange = MomentBasis::dimRange;
     using MatrixType = typename XT::LA::Container<RangeFieldType>::MatrixType;
     using VectorType = typename XT::LA::Container<RangeFieldType>::VectorType;
 
@@ -81,8 +81,8 @@ struct HyperbolicMnDiscretization
     const AdvectionSourceSpaceType advection_source_space(grid_view, 1);
 
     //******************* create EquationType object ***************************************
-    std::shared_ptr<const BasisfunctionType> basis_functions =
-        std::make_shared<const BasisfunctionType>(quad_order, num_quad_refinements);
+    std::shared_ptr<const MomentBasis> basis_functions =
+        std::make_shared<const MomentBasis>(quad_order, num_quad_refinements);
     const std::unique_ptr<ProblemType> problem_ptr =
         XT::Common::make_unique<ProblemType>(*basis_functions, grid_view, grid_config);
     const auto& problem = *problem_ptr;
@@ -102,8 +102,8 @@ struct HyperbolicMnDiscretization
     // ******************** choose flux and rhs operator and timestepper ******************************************
 
     using AdvectionOperatorType = AdvectionFvOperator<MatrixType, GV, dimRange>;
-    using EigenvectorWrapperType = typename EigenvectorWrapperChooser<BasisfunctionType, AnalyticalFluxType>::type;
-    using EntropySolverType = EntropySolver<BasisfunctionType, SpaceType>;
+    using EigenvectorWrapperType = typename EigenvectorWrapperChooser<MomentBasis, AnalyticalFluxType>::type;
+    using EntropySolverType = EntropySolver<MomentBasis, SpaceType>;
     using ReconstructionOperatorType =
         LinearReconstructionOperator<AnalyticalFluxType, BoundaryValueType, GV, MatrixType, EigenvectorWrapperType>;
     using ReconstructionAdvectionOperatorType =
@@ -115,7 +115,7 @@ struct HyperbolicMnDiscretization
         ExplicitRungeKuttaTimeStepper<FvOperatorType,
                                       DiscreteFunctionType,
                                       TimeStepperMethods::explicit_rungekutta_second_order_ssp>;
-    using RhsTimeStepperType = KineticIsotropicTimeStepper<DiscreteFunctionType, BasisfunctionType>;
+    using RhsTimeStepperType = KineticIsotropicTimeStepper<DiscreteFunctionType, MomentBasis>;
     using TimeStepperType = StrangSplittingTimeStepper<RhsTimeStepperType, OperatorTimeStepperType>;
 
     // *************** Calculate dx and initial dt **************************************
@@ -128,7 +128,7 @@ struct HyperbolicMnDiscretization
     RangeFieldType dt = CFL * dx;
 
     // *********************** create operators and timesteppers ************************************
-    NumericalKineticFlux<GV, BasisfunctionType> numerical_flux(*analytical_flux, *basis_functions);
+    NumericalKineticFlux<GV, MomentBasis> numerical_flux(*analytical_flux, *basis_functions);
     AdvectionOperatorType advection_operator(grid_view, numerical_flux, advection_source_space, fv_space);
 
     // boundary treatment
diff --git a/dune/gdt/test/momentmodels/kineticequation.hh b/dune/gdt/test/momentmodels/kineticequation.hh
index 049d5d8be..c866e79fe 100644
--- a/dune/gdt/test/momentmodels/kineticequation.hh
+++ b/dune/gdt/test/momentmodels/kineticequation.hh
@@ -20,19 +20,19 @@ namespace Dune {
 namespace GDT {
 
 
-template <class E, class BasisfunctionImp>
+template <class E, class MomentBasisImp>
 class KineticEquationInterface
 {
   using ThisType = KineticEquationInterface;
 
 public:
-  using BasisfunctionType = BasisfunctionImp;
-  using DomainFieldType = typename BasisfunctionType::DomainFieldType;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
-  static const size_t dimDomain = BasisfunctionType::dimDomain;
-  static const size_t dimRange = BasisfunctionType::dimRange;
-  static const size_t dimRangeCols = BasisfunctionType::dimRangeCols;
-  static const size_t dimFlux = BasisfunctionType::dimFlux;
+  using MomentBasis = MomentBasisImp;
+  using DomainFieldType = typename MomentBasis::DomainFieldType;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
+  static const size_t dimDomain = MomentBasis::dimDomain;
+  static const size_t dimRange = MomentBasis::dimRange;
+  static const size_t dimRangeCols = MomentBasis::dimRangeCols;
+  static const size_t dimFlux = MomentBasis::dimFlux;
   using FluxType = XT::Functions::FluxFunctionInterface<E, dimRange, dimDomain, dimRange, RangeFieldType>;
   using GenericFluxFunctionType = XT::Functions::GenericFluxFunction<E, dimRange, dimDomain, dimRange, RangeFieldType>;
   using InitialValueType = XT::Functions::FunctionInterface<dimDomain, dimRange, 1, RangeFieldType>;
@@ -45,7 +45,7 @@ public:
   using RangeReturnType = typename InitialValueType::RangeReturnType;
   using DynamicRangeType = Dune::DynamicVector<RangeFieldType>;
 
-  KineticEquationInterface(const BasisfunctionType& basis_functions)
+  KineticEquationInterface(const MomentBasis& basis_functions)
     : basis_functions_(basis_functions)
   {}
 
@@ -95,7 +95,7 @@ public:
   }
 
 protected:
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
 }; // class KineticEquationInterface<E, ...>
 
 
diff --git a/dune/gdt/test/momentmodels/kinetictransport/base.hh b/dune/gdt/test/momentmodels/kinetictransport/base.hh
index 8fe4cbc4a..871c53f9c 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/base.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/base.hh
@@ -27,23 +27,23 @@ namespace Dune {
 namespace GDT {
 
 
-template <class E, class BasisfunctionImp>
-class KineticTransportEquationBase : public KineticEquationInterface<E, BasisfunctionImp>
+template <class E, class MomentBasisImp>
+class KineticTransportEquationBase : public KineticEquationInterface<E, MomentBasisImp>
 {
   using ThisType = KineticTransportEquationBase;
-  using BaseType = KineticEquationInterface<E, BasisfunctionImp>;
+  using BaseType = KineticEquationInterface<E, MomentBasisImp>;
 
 public:
   using BaseType::dimDomain;
   using BaseType::dimFlux;
   using BaseType::dimRange;
   using BaseType::dimRangeCols;
-  using typename BaseType::BasisfunctionType;
   using typename BaseType::DomainFieldType;
   using typename BaseType::DomainType;
   using typename BaseType::GenericFluxFunctionType;
   using typename BaseType::GenericFunctionType;
   using typename BaseType::MatrixType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
   using typename BaseType::RangeReturnType;
   using typename BaseType::StateType;
@@ -59,7 +59,7 @@ public:
   using BaseType::default_boundary_cfg;
   using BaseType::default_grid_cfg;
 
-  KineticTransportEquationBase(const BasisfunctionType& basis_functions,
+  KineticTransportEquationBase(const MomentBasis& basis_functions,
                                const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                                const XT::Common::Configuration& boundary_cfg = default_boundary_cfg(),
                                const RangeFieldType psi_vac = 5e-9)
@@ -74,7 +74,7 @@ public:
              VectorType& x,
              const VectorType& rhs,
              const MomentBasisInterface<DomainFieldType,
-                                        BasisfunctionType::dimDomain,
+                                        MomentBasis::dimDomain,
                                         RangeFieldType,
                                         dimRange,
                                         dimRangeCols,
diff --git a/dune/gdt/test/momentmodels/kinetictransport/checkerboard.hh b/dune/gdt/test/momentmodels/kinetictransport/checkerboard.hh
index a6843435b..14b1348a5 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/checkerboard.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/checkerboard.hh
@@ -25,23 +25,23 @@ namespace GDT {
  * http://dx.doi.org/10.1016/j.jcp.2005.04.011
  * The 3D version is a straightforward generalization of the setup to 3 dimensions.
  * */
-template <class E, class BasisfunctionImp>
-class CheckerboardPn : public KineticTransportEquationBase<E, BasisfunctionImp>
+template <class E, class MomentBasisImp>
+class CheckerboardPn : public KineticTransportEquationBase<E, MomentBasisImp>
 {
-  using BaseType = KineticTransportEquationBase<E, BasisfunctionImp>;
+  using BaseType = KineticTransportEquationBase<E, MomentBasisImp>;
 
 public:
   using BaseType::dimDomain;
-  using typename BaseType::BasisfunctionType;
   using typename BaseType::BoundaryValueType;
   using typename BaseType::DomainType;
   using typename BaseType::GenericScalarFunctionType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
   using typename BaseType::ScalarFunctionType;
 
   using BaseType::default_boundary_cfg;
 
-  CheckerboardPn(const BasisfunctionType& basis_functions,
+  CheckerboardPn(const MomentBasis& basis_functions,
                  const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                  const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
     : BaseType(basis_functions, grid_cfg, boundary_cfg, 1e-8 / (4 * M_PI))
@@ -138,19 +138,19 @@ protected:
   }
 }; // class CheckerboardPn<...>
 
-template <class GV, class BasisfunctionType>
-class CheckerboardMn : public CheckerboardPn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>
+template <class GV, class MomentBasis>
+class CheckerboardMn : public CheckerboardPn<XT::Grid::extract_entity_t<GV>, MomentBasis>
 {
-  using BaseType = CheckerboardPn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>;
+  using BaseType = CheckerboardPn<XT::Grid::extract_entity_t<GV>, MomentBasis>;
 
 public:
   using typename BaseType::FluxType;
-  using ActualFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using ActualFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
 
   using BaseType::default_boundary_cfg;
   using BaseType::default_grid_cfg;
 
-  CheckerboardMn(const BasisfunctionType& basis_functions,
+  CheckerboardMn(const MomentBasis& basis_functions,
                  const GV& grid_view,
                  const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                  const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
diff --git a/dune/gdt/test/momentmodels/kinetictransport/planesource.hh b/dune/gdt/test/momentmodels/kinetictransport/planesource.hh
index bab1fa044..3e347ca43 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/planesource.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/planesource.hh
@@ -17,25 +17,25 @@ namespace Dune {
 namespace GDT {
 
 
-template <class E, class BasisfunctionImp>
-class PlaneSourcePn : public KineticTransportEquationBase<E, BasisfunctionImp>
+template <class E, class MomentBasisImp>
+class PlaneSourcePn : public KineticTransportEquationBase<E, MomentBasisImp>
 {
-  using BaseType = KineticTransportEquationBase<E, BasisfunctionImp>;
+  using BaseType = KineticTransportEquationBase<E, MomentBasisImp>;
 
 public:
   using BaseType::default_boundary_cfg;
   using BaseType::dimDomain;
   using BaseType::dimRange;
-  using typename BaseType::BasisfunctionType;
   using typename BaseType::ConstantScalarFunctionType;
   using typename BaseType::DomainType;
   using typename BaseType::GenericFunctionType;
   using typename BaseType::InitialValueType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
   using typename BaseType::RangeReturnType;
   using typename BaseType::ScalarFunctionType;
 
-  PlaneSourcePn(const BasisfunctionType& basis_functions,
+  PlaneSourcePn(const MomentBasis& basis_functions,
                 const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                 const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
     : BaseType(basis_functions, grid_cfg, boundary_cfg)
@@ -109,21 +109,21 @@ protected:
 }; // class PlaneSourcePn<...>
 
 
-template <class GV, class BasisfunctionType>
-class PlaneSourceMn : public PlaneSourcePn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>
+template <class GV, class MomentBasis>
+class PlaneSourceMn : public PlaneSourcePn<XT::Grid::extract_entity_t<GV>, MomentBasis>
 {
-  using BaseType = PlaneSourcePn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>;
+  using BaseType = PlaneSourcePn<XT::Grid::extract_entity_t<GV>, MomentBasis>;
   using ThisType = PlaneSourceMn;
 
 public:
   using typename BaseType::FluxType;
   using typename BaseType::RangeReturnType;
-  using ActualFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using ActualFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
 
   using BaseType::default_boundary_cfg;
   using BaseType::default_grid_cfg;
 
-  PlaneSourceMn(const BasisfunctionType& basis_functions,
+  PlaneSourceMn(const MomentBasis& basis_functions,
                 const GV& grid_view,
                 const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                 const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
diff --git a/dune/gdt/test/momentmodels/kinetictransport/pointsource.hh b/dune/gdt/test/momentmodels/kinetictransport/pointsource.hh
index 290943f76..6afa3e80d 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/pointsource.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/pointsource.hh
@@ -18,25 +18,25 @@ namespace Dune {
 namespace GDT {
 
 
-template <class E, class BasisfunctionImp>
-class PointSourcePn : public KineticTransportEquationBase<E, BasisfunctionImp>
+template <class E, class MomentBasisImp>
+class PointSourcePn : public KineticTransportEquationBase<E, MomentBasisImp>
 {
-  using BaseType = KineticTransportEquationBase<E, BasisfunctionImp>;
+  using BaseType = KineticTransportEquationBase<E, MomentBasisImp>;
 
 public:
   using BaseType::dimDomain;
-  using typename BaseType::BasisfunctionType;
   using typename BaseType::ConstantScalarFunctionType;
   using typename BaseType::DomainType;
   using typename BaseType::GenericFunctionType;
   using typename BaseType::InitialValueType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
   using typename BaseType::RangeReturnType;
   using typename BaseType::ScalarFunctionType;
 
   using BaseType::default_boundary_cfg;
 
-  PointSourcePn(const BasisfunctionType& basis_functions,
+  PointSourcePn(const MomentBasis& basis_functions,
                 const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                 const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
     : BaseType(basis_functions, grid_cfg, boundary_cfg, 1e-8 / (4 * M_PI))
@@ -99,20 +99,20 @@ protected:
   using BaseType::psi_vac_;
 }; // class PointSourcePn<...>
 
-template <class GV, class BasisfunctionType>
-class PointSourceMn : public PointSourcePn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>
+template <class GV, class MomentBasis>
+class PointSourceMn : public PointSourcePn<XT::Grid::extract_entity_t<GV>, MomentBasis>
 {
-  using BaseType = PointSourcePn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>;
+  using BaseType = PointSourcePn<XT::Grid::extract_entity_t<GV>, MomentBasis>;
   using ThisType = PointSourceMn;
 
 public:
   using typename BaseType::FluxType;
-  using ActualFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using ActualFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
 
   using BaseType::default_boundary_cfg;
   using BaseType::default_grid_cfg;
 
-  PointSourceMn(const BasisfunctionType& basis_functions,
+  PointSourceMn(const MomentBasis& basis_functions,
                 const GV& grid_view,
                 const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                 const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
diff --git a/dune/gdt/test/momentmodels/kinetictransport/shadow.hh b/dune/gdt/test/momentmodels/kinetictransport/shadow.hh
index 863b294de..11b0111f3 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/shadow.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/shadow.hh
@@ -15,24 +15,24 @@ namespace Dune {
 namespace GDT {
 
 
-template <class E, class BasisfunctionImp>
-class ShadowPn : public KineticTransportEquationBase<E, BasisfunctionImp>
+template <class E, class MomentBasisImp>
+class ShadowPn : public KineticTransportEquationBase<E, MomentBasisImp>
 {
-  using BaseType = KineticTransportEquationBase<E, BasisfunctionImp>;
+  using BaseType = KineticTransportEquationBase<E, MomentBasisImp>;
 
 public:
-  using typename BaseType::BasisfunctionType;
   using typename BaseType::BoundaryValueType;
   using typename BaseType::ConstantScalarFunctionType;
   using typename BaseType::DomainType;
   using typename BaseType::GenericFunctionType;
   using typename BaseType::GenericScalarFunctionType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
   using typename BaseType::ScalarFunctionType;
 
   using BaseType::default_boundary_cfg;
 
-  ShadowPn(const BasisfunctionType& basis_functions,
+  ShadowPn(const MomentBasis& basis_functions,
            const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
            const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
     : BaseType(basis_functions, grid_cfg, boundary_cfg, 1e-8 / (4 * M_PI))
@@ -107,20 +107,20 @@ protected:
   using BaseType::psi_vac_;
 }; // class ShadowPn<...>
 
-template <class GV, class BasisfunctionType>
-class ShadowMn : public ShadowPn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>
+template <class GV, class MomentBasis>
+class ShadowMn : public ShadowPn<XT::Grid::extract_entity_t<GV>, MomentBasis>
 {
-  using BaseType = ShadowPn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>;
+  using BaseType = ShadowPn<XT::Grid::extract_entity_t<GV>, MomentBasis>;
   using ThisType = ShadowMn;
 
 public:
   using typename BaseType::FluxType;
-  using ActualFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using ActualFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
 
   using BaseType::default_boundary_cfg;
   using BaseType::default_grid_cfg;
 
-  ShadowMn(const BasisfunctionType& basis_functions,
+  ShadowMn(const MomentBasis& basis_functions,
            const GV& grid_view,
            const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
            const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
diff --git a/dune/gdt/test/momentmodels/kinetictransport/sourcebeam.hh b/dune/gdt/test/momentmodels/kinetictransport/sourcebeam.hh
index b034e3c6d..adbeb01c9 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/sourcebeam.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/sourcebeam.hh
@@ -28,15 +28,14 @@ namespace Dune {
 namespace GDT {
 
 
-template <class E, class BasisfunctionImp>
-class SourceBeamPn : public KineticTransportEquationBase<E, BasisfunctionImp>
+template <class E, class MomentBasisImp>
+class SourceBeamPn : public KineticTransportEquationBase<E, MomentBasisImp>
 {
-  using BaseType = KineticTransportEquationBase<E, BasisfunctionImp>;
+  using BaseType = KineticTransportEquationBase<E, MomentBasisImp>;
 
 public:
   using BaseType::dimDomain;
   using BaseType::dimRange;
-  using typename BaseType::BasisfunctionType;
   using typename BaseType::BoundaryValueType;
   using typename BaseType::DomainFieldType;
   using typename BaseType::DomainType;
@@ -44,13 +43,14 @@ public:
   using typename BaseType::FluxType;
   using typename BaseType::GenericFunctionType;
   using typename BaseType::GenericScalarFunctionType;
+  using typename BaseType::MomentBasis;
   using typename BaseType::RangeFieldType;
   using typename BaseType::RangeReturnType;
   using typename BaseType::ScalarFunctionType;
 
   using BaseType::default_boundary_cfg;
 
-  SourceBeamPn(const BasisfunctionType& basis_functions,
+  SourceBeamPn(const MomentBasis& basis_functions,
                const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                const XT::Common::Configuration& boundary_cfg = default_boundary_cfg(),
                const bool is_mn_model = false)
@@ -82,7 +82,7 @@ public:
   {
     return std::make_unique<GenericFunctionType>(1, [&](const DomainType& x, const XT::Common::Parameter&) {
       if (x[0] < 1.5) {
-        static auto ret = helper<BasisfunctionType>::get_left_boundary_values(basis_functions_, psi_vac_, is_mn_model_);
+        static auto ret = helper<MomentBasis>::get_left_boundary_values(basis_functions_, psi_vac_, is_mn_model_);
         return ret;
       } else {
         auto ret = basis_functions_.integrated();
@@ -94,7 +94,7 @@ public:
 
   RangeReturnType left_boundary_value() const
   {
-    return helper<BasisfunctionType>::get_left_boundary_values(basis_functions_, psi_vac_, is_mn_model_);
+    return helper<MomentBasis>::get_left_boundary_values(basis_functions_, psi_vac_, is_mn_model_);
   }
 
   virtual RangeFieldType t_end() const override
@@ -166,7 +166,7 @@ protected:
     using helper_base::denominator;
     using helper_base::numerator;
 
-    static DynamicRangeType get_left_boundary_values(const BasisfunctionImp& basis_functions,
+    static DynamicRangeType get_left_boundary_values(const MomentBasisImp& basis_functions,
                                                      const RangeFieldType& psi_vac,
                                                      const bool is_mn_model)
     {
@@ -176,7 +176,7 @@ protected:
       // For the PN-Models, we do not have these issues and just use a very fine quadrature (which is not a performance
       // problem as the integration is only done once).
       const auto& quadratures =
-          is_mn_model ? basis_functions.quadratures() : BasisfunctionImp::gauss_lobatto_quadratures(100, 31);
+          is_mn_model ? basis_functions.quadratures() : MomentBasisImp::gauss_lobatto_quadratures(100, 31);
       for (size_t ii = 0; ii < quadratures.size(); ++ii) {
         const auto& quadrature = quadratures[ii];
         for (const auto& quad_point : quadrature) {
@@ -201,7 +201,7 @@ protected:
     using helper_base::integral_1;
     using helper_base::numerator;
 
-    static DynamicRangeType get_left_boundary_values(const BasisfunctionImp& basis_functions,
+    static DynamicRangeType get_left_boundary_values(const MomentBasisImp& basis_functions,
                                                      const RangeFieldType psi_vac,
                                                      const bool /*is_mn_model*/)
     {
@@ -233,7 +233,7 @@ protected:
     using helper_base::integral_1;
     using helper_base::integral_2;
 
-    static DynamicRangeType get_left_boundary_values(const BasisfunctionImp& basis_functions,
+    static DynamicRangeType get_left_boundary_values(const MomentBasisImp& basis_functions,
                                                      const RangeFieldType psi_vac,
                                                      const bool /*is_mn_model*/)
     {
@@ -254,21 +254,21 @@ protected:
   const bool is_mn_model_;
 }; // class SourceBeamPn<...>
 
-template <class GV, class BasisfunctionType>
-class SourceBeamMn : public SourceBeamPn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>
+template <class GV, class MomentBasis>
+class SourceBeamMn : public SourceBeamPn<XT::Grid::extract_entity_t<GV>, MomentBasis>
 {
-  using BaseType = SourceBeamPn<XT::Grid::extract_entity_t<GV>, BasisfunctionType>;
+  using BaseType = SourceBeamPn<XT::Grid::extract_entity_t<GV>, MomentBasis>;
   using ThisType = SourceBeamMn;
 
 public:
   using typename BaseType::FluxType;
   using typename BaseType::RangeReturnType;
-  using ActualFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using ActualFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
 
   using BaseType::default_boundary_cfg;
   using BaseType::default_grid_cfg;
 
-  SourceBeamMn(const BasisfunctionType& basis_functions,
+  SourceBeamMn(const MomentBasis& basis_functions,
                const GV& grid_view,
                const XT::Common::Configuration& grid_cfg = default_grid_cfg(),
                const XT::Common::Configuration& boundary_cfg = default_boundary_cfg())
diff --git a/dune/gdt/test/momentmodels/kinetictransport/testcases.hh b/dune/gdt/test/momentmodels/kinetictransport/testcases.hh
index d1fd1fa6f..7c7936d22 100644
--- a/dune/gdt/test/momentmodels/kinetictransport/testcases.hh
+++ b/dune/gdt/test/momentmodels/kinetictransport/testcases.hh
@@ -16,7 +16,7 @@
 #include <dune/gdt/momentmodels/basisfunctions.hh>
 #include <dune/gdt/spaces/l2/finite-volume.hh>
 #include <dune/gdt/spaces/l2/discontinuous-lagrange.hh>
-#include <dune/gdt/timestepper/interface.hh>
+#include <dune/gdt/tools/timestepper/interface.hh>
 #include <dune/gdt/operators/reconstruction/slopes.hh>
 
 #include "checkerboard.hh"
@@ -29,8 +29,8 @@ namespace Dune {
 namespace GDT {
 
 
-// choose RealizabilityLimiter suitable for BasisfunctionImp
-template <class GV, class BasisfunctionImp, class AnalyticalFluxType, class DiscreteFunctionType>
+// choose RealizabilityLimiter suitable for MomentBasisImp
+template <class GV, class MomentBasisImp, class AnalyticalFluxType, class DiscreteFunctionType>
 struct RealizabilityLimiterChooser;
 
 #if HAVE_CLP
@@ -40,16 +40,16 @@ struct RealizabilityLimiterChooser<GV,
                                    AnalyticalFluxType,
                                    DiscreteFunctionType>
 {
-  using BasisfunctionType = LegendreMomentBasis<double, double, order>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using MomentBasis = LegendreMomentBasis<double, double, order>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
   static constexpr size_t quad_order = 31;
   static constexpr size_t num_quad_refinements = 6;
 
   template <class EigenVectorWrapperType>
-  static std::unique_ptr<LpConvexhullRealizabilityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& basis_functions, const double epsilon)
+  static std::unique_ptr<LpConvexhullRealizabilityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>>
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& basis_functions, const double epsilon)
   {
-    using SlopeType = LpConvexhullRealizabilityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>;
+    using SlopeType = LpConvexhullRealizabilityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, basis_functions, epsilon);
   }
 };
@@ -64,23 +64,23 @@ struct RealizabilityLimiterChooser<GV,
                                    AnalyticalFluxType,
                                    DiscreteFunctionType>
 {
-  using BasisfunctionType = HatFunctionMomentBasis<double, 1, double, dimRange, 1, 1>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using MomentBasis = HatFunctionMomentBasis<double, 1, double, dimRange, 1, 1>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
   static constexpr size_t quad_order = 15;
   static constexpr size_t num_quad_refinements = 0;
 
 #if HAVE_CLP && USE_LP_POSITIVITY_LIMITER
   template <class EigenVectorWrapperType>
-  static std::unique_ptr<LpPositivityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& /*basis_functions*/, const double epsilon)
+  static std::unique_ptr<LpPositivityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>>
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& /*basis_functions*/, const double epsilon)
   {
-    using SlopeType = LpPositivityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>;
+    using SlopeType = LpPositivityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, epsilon);
   }
 #else // HAVE_CLP
   template <class EigenVectorWrapperType>
   static std::unique_ptr<PositivityLimitedSlope<GV, double, dimRange, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& /*basis_functions*/, const double epsilon)
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& /*basis_functions*/, const double epsilon)
   {
     using SlopeType = PositivityLimitedSlope<GV, double, dimRange, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, epsilon);
@@ -94,14 +94,14 @@ struct RealizabilityLimiterChooser<GV,
                                    AnalyticalFluxType,
                                    DiscreteFunctionType>
 {
-  using BasisfunctionType = PartialMomentBasis<double, 1, double, dimRange, 1, 1>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using MomentBasis = PartialMomentBasis<double, 1, double, dimRange, 1, 1>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
   static constexpr size_t quad_order = 15;
   static constexpr size_t num_quad_refinements = 0;
 
   template <class EigenVectorWrapperType>
   static std::unique_ptr<Dg1dRealizabilityLimitedSlope<GV, double, dimRange, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& basis_functions, const double epsilon)
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& basis_functions, const double epsilon)
   {
     using SlopeType = Dg1dRealizabilityLimitedSlope<GV, double, dimRange, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, basis_functions, epsilon);
@@ -115,16 +115,16 @@ struct RealizabilityLimiterChooser<GV,
                                    AnalyticalFluxType,
                                    DiscreteFunctionType>
 {
-  using BasisfunctionType = RealSphericalHarmonicsMomentBasis<double, double, order, 3>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using MomentBasis = RealSphericalHarmonicsMomentBasis<double, double, order, 3>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
   static constexpr size_t quad_order = 2 * order + 6;
   static constexpr size_t num_quad_refinements = 0;
 
   template <class EigenVectorWrapperType>
-  static std::unique_ptr<LpConvexhullRealizabilityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& basis_functions, const double epsilon)
+  static std::unique_ptr<LpConvexhullRealizabilityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>>
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& basis_functions, const double epsilon)
   {
-    using SlopeType = LpConvexhullRealizabilityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>;
+    using SlopeType = LpConvexhullRealizabilityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, basis_functions, epsilon);
   }
 };
@@ -136,26 +136,26 @@ struct RealizabilityLimiterChooser<GV,
                                    AnalyticalFluxType,
                                    DiscreteFunctionType>
 {
-  using BasisfunctionType = HatFunctionMomentBasis<double, 3, double, refinements, 1, 3>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
-  static constexpr size_t dimRange = BasisfunctionType::dimRange;
+  using MomentBasis = HatFunctionMomentBasis<double, 3, double, refinements, 1, 3>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
+  static constexpr size_t dimRange = MomentBasis::dimRange;
   static constexpr size_t quad_order = 7; // fekete rule number 7
   static constexpr size_t num_quad_refinements = 0;
 
 #if HAVE_CLP && USE_LP_POSITIVITY_LIMITER
   template <class EigenVectorWrapperType>
-  static std::unique_ptr<LpPositivityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& /*basis_functions*/, const double epsilon)
+  static std::unique_ptr<LpPositivityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>>
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& /*basis_functions*/, const double epsilon)
   {
-    using SlopeType = LpPositivityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>;
+    using SlopeType = LpPositivityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, epsilon);
   }
 #else // HAVE_CLP
   template <class EigenVectorWrapperType>
   static std::unique_ptr<PositivityLimitedSlope<GV, double, dimRange, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& /*basis_functions*/, const double epsilon)
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& /*basis_functions*/, const double epsilon)
   {
-    using SlopeType = PositivityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>;
+    using SlopeType = PositivityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, epsilon);
   }
 #endif // HAVE_CLP
@@ -168,23 +168,23 @@ struct RealizabilityLimiterChooser<GV,
                                    AnalyticalFluxType,
                                    DiscreteFunctionType>
 {
-  using BasisfunctionType = PartialMomentBasis<double, 3, double, refinements, 1, 3>;
-  using EntropyFluxType = EntropyBasedFluxFunction<GV, BasisfunctionType>;
+  using MomentBasis = PartialMomentBasis<double, 3, double, refinements, 1, 3>;
+  using EntropyFluxType = EntropyBasedFluxFunction<GV, MomentBasis>;
   static constexpr size_t quad_order = 3; // fekete rule number 3
   static constexpr size_t num_quad_refinements = 0;
 
   template <class EigenVectorWrapperType>
-  static std::unique_ptr<DgConvexHullRealizabilityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>>
-  make_slope(const EntropyFluxType& entropy_flux, const BasisfunctionType& basis_functions, const double epsilon)
+  static std::unique_ptr<DgConvexHullRealizabilityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>>
+  make_slope(const EntropyFluxType& entropy_flux, const MomentBasis& basis_functions, const double epsilon)
   {
-    using SlopeType = DgConvexHullRealizabilityLimitedSlope<GV, BasisfunctionType, EigenVectorWrapperType>;
+    using SlopeType = DgConvexHullRealizabilityLimitedSlope<GV, MomentBasis, EigenVectorWrapperType>;
     return std::make_unique<SlopeType>(entropy_flux, basis_functions, epsilon);
   }
 };
 #endif // HAVE_QHULL
 
 // SourceBeam Pn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct SourceBeamPnExpectedResults;
 
 template <bool reconstruct>
@@ -214,14 +214,14 @@ struct SourceBeamPnExpectedResults<PartialMomentBasis<double, 1, double, 8, 1, 1
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
 struct SourceBeamPnTestCase
 {
-  using BasisfunctionType = BasisfunctionImp;
-  static constexpr size_t dimDomain = BasisfunctionType::dimDomain;
-  static constexpr size_t dimRange = BasisfunctionType::dimRange;
-  using DomainFieldType = typename BasisfunctionType::DomainFieldType;
-  using RangeFieldType = typename BasisfunctionType::RangeFieldType;
+  using MomentBasis = MomentBasisImp;
+  static constexpr size_t dimDomain = MomentBasis::dimDomain;
+  static constexpr size_t dimRange = MomentBasis::dimRange;
+  using DomainFieldType = typename MomentBasis::DomainFieldType;
+  using RangeFieldType = typename MomentBasis::RangeFieldType;
   using GridType = GridImp;
   using GridViewType = typename GridType::LeafGridView;
   using E = XT::Grid::extract_entity_t<GridViewType>;
@@ -230,15 +230,15 @@ struct SourceBeamPnTestCase
       std::conditional_t<reconstruct, DiscontinuousLagrangeSpace<GridViewType, dimRange, RangeFieldType>, SpaceType>;
   using VectorType = typename Dune::XT::LA::Container<RangeFieldType, Dune::XT::LA::default_backend>::VectorType;
   using DiscreteFunctionType = DiscreteFunction<VectorType, GridViewType, dimRange, 1, RangeFieldType>;
-  using ProblemType = SourceBeamPn<E, BasisfunctionType>;
+  using ProblemType = SourceBeamPn<E, MomentBasis>;
   static constexpr RangeFieldType t_end = 0.25;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = SourceBeamPnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = SourceBeamPnExpectedResults<MomentBasisImp, reconstruction>;
 };
 
 
 // SourceBeam Mn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct SourceBeamMnExpectedResults;
 
 template <bool reconstruct>
@@ -268,20 +268,20 @@ struct SourceBeamMnExpectedResults<PartialMomentBasis<double, 1, double, 8, 1, 1
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct SourceBeamMnTestCase : public SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct SourceBeamMnTestCase : public SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using typename BaseType::DiscreteFunctionType;
   using typename BaseType::GridViewType;
-  using ProblemType = SourceBeamMn<GridViewType, BasisfunctionImp>;
-  using ExpectedResultsType = SourceBeamMnExpectedResults<BasisfunctionImp, reconstruct>;
+  using ProblemType = SourceBeamMn<GridViewType, MomentBasisImp>;
+  using ExpectedResultsType = SourceBeamMnExpectedResults<MomentBasisImp, reconstruct>;
   using RealizabilityLimiterChooserType =
-      RealizabilityLimiterChooser<GridViewType, BasisfunctionImp, typename ProblemType::FluxType, DiscreteFunctionType>;
+      RealizabilityLimiterChooser<GridViewType, MomentBasisImp, typename ProblemType::FluxType, DiscreteFunctionType>;
 };
 
 // PlaneSource Pn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct PlaneSourcePnExpectedResults;
 
 template <bool reconstruct>
@@ -311,21 +311,21 @@ struct PlaneSourcePnExpectedResults<PartialMomentBasis<double, 1, double, 8, 1,
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct PlaneSourcePnTestCase : SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct PlaneSourcePnTestCase : SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using RangeFieldType = typename BaseType::RangeFieldType;
   using typename BaseType::E;
-  using ProblemType = PlaneSourcePn<E, BasisfunctionImp>;
+  using ProblemType = PlaneSourcePn<E, MomentBasisImp>;
   static constexpr RangeFieldType t_end = 0.25;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = PlaneSourcePnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = PlaneSourcePnExpectedResults<MomentBasisImp, reconstruction>;
 };
 
 
 // PlaneSource Mn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct PlaneSourceMnExpectedResults;
 
 template <bool reconstruct>
@@ -355,24 +355,24 @@ struct PlaneSourceMnExpectedResults<PartialMomentBasis<double, 1, double, 8, 1,
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct PlaneSourceMnTestCase : SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct PlaneSourceMnTestCase : SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using typename BaseType::DiscreteFunctionType;
   using RangeFieldType = typename BaseType::RangeFieldType;
   using typename BaseType::GridViewType;
-  using ProblemType = PlaneSourceMn<GridViewType, BasisfunctionImp>;
+  using ProblemType = PlaneSourceMn<GridViewType, MomentBasisImp>;
   static constexpr RangeFieldType t_end = 0.25;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = PlaneSourceMnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = PlaneSourceMnExpectedResults<MomentBasisImp, reconstruction>;
   using RealizabilityLimiterChooserType =
-      RealizabilityLimiterChooser<GridViewType, BasisfunctionImp, typename ProblemType::FluxType, DiscreteFunctionType>;
+      RealizabilityLimiterChooser<GridViewType, MomentBasisImp, typename ProblemType::FluxType, DiscreteFunctionType>;
 };
 
 
 // PointSourcePn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct PointSourcePnExpectedResults
 {
   static constexpr double l1norm = 0.;
@@ -437,20 +437,20 @@ struct PointSourcePnExpectedResults<PartialMomentBasis<double, 3, double, 1, 1,
 };
 
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct PointSourcePnTestCase : SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct PointSourcePnTestCase : SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using RangeFieldType = typename BaseType::RangeFieldType;
   using typename BaseType::E;
-  using ProblemType = PointSourcePn<E, BasisfunctionImp>;
+  using ProblemType = PointSourcePn<E, MomentBasisImp>;
   static constexpr RangeFieldType t_end = 0.1;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = PointSourcePnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = PointSourcePnExpectedResults<MomentBasisImp, reconstruction>;
 };
 
 // CheckerboardPn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct CheckerboardPnExpectedResults
 {
   static constexpr double l1norm = 0.;
@@ -468,20 +468,20 @@ struct CheckerboardPnExpectedResults<RealSphericalHarmonicsMomentBasis<double, d
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct CheckerboardPnTestCase : SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct CheckerboardPnTestCase : SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using RangeFieldType = typename BaseType::RangeFieldType;
   using typename BaseType::E;
-  using ProblemType = CheckerboardPn<E, BasisfunctionImp>;
+  using ProblemType = CheckerboardPn<E, MomentBasisImp>;
   static constexpr RangeFieldType t_end = 0.1;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = CheckerboardPnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = CheckerboardPnExpectedResults<MomentBasisImp, reconstruction>;
 };
 
 // ShadowPn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct ShadowPnExpectedResults
 {
   static constexpr double l1norm = 0.;
@@ -499,21 +499,21 @@ struct ShadowPnExpectedResults<RealSphericalHarmonicsMomentBasis<double, double,
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct ShadowPnTestCase : SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct ShadowPnTestCase : SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamPnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamPnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using RangeFieldType = typename BaseType::RangeFieldType;
   using typename BaseType::E;
-  using ProblemType = ShadowPn<E, BasisfunctionImp>;
+  using ProblemType = ShadowPn<E, MomentBasisImp>;
   static constexpr RangeFieldType t_end = 0.1;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = ShadowPnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = ShadowPnExpectedResults<MomentBasisImp, reconstruction>;
 };
 
 
 // PointSourceMn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct PointSourceMnExpectedResults;
 
 template <bool reconstruct>
@@ -550,25 +550,25 @@ struct PointSourceMnExpectedResults<PartialMomentBasis<double, 3, double, 0, 1,
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct PointSourceMnTestCase : SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct PointSourceMnTestCase : SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using typename BaseType::GridViewType;
-  using ProblemType = PointSourceMn<GridViewType, BasisfunctionImp>;
+  using ProblemType = PointSourceMn<GridViewType, MomentBasisImp>;
   using typename BaseType::RangeFieldType;
   static constexpr RangeFieldType t_end = 0.1;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = PointSourceMnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = PointSourceMnExpectedResults<MomentBasisImp, reconstruction>;
   using RealizabilityLimiterChooserType = RealizabilityLimiterChooser<GridViewType,
-                                                                      BasisfunctionImp,
+                                                                      MomentBasisImp,
                                                                       typename ProblemType::FluxType,
                                                                       typename BaseType::DiscreteFunctionType>;
 };
 
 
 // CheckerboardMn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct CheckerboardMnExpectedResults;
 
 template <bool reconstruct>
@@ -580,25 +580,25 @@ struct CheckerboardMnExpectedResults<RealSphericalHarmonicsMomentBasis<double, d
   static constexpr double tol = 1e-9;
 };
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct CheckerboardMnTestCase : SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct CheckerboardMnTestCase : SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using typename BaseType::GridViewType;
-  using ProblemType = CheckerboardMn<GridViewType, BasisfunctionImp>;
+  using ProblemType = CheckerboardMn<GridViewType, MomentBasisImp>;
   using typename BaseType::RangeFieldType;
   static constexpr RangeFieldType t_end = 0.1;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = CheckerboardMnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = CheckerboardMnExpectedResults<MomentBasisImp, reconstruction>;
   using RealizabilityLimiterChooserType = RealizabilityLimiterChooser<GridViewType,
-                                                                      BasisfunctionImp,
+                                                                      MomentBasisImp,
                                                                       typename ProblemType::FluxType,
                                                                       typename BaseType::DiscreteFunctionType>;
 };
 
 
 // ShadowMn
-template <class BasisfunctionImp, bool reconstruct>
+template <class MomentBasisImp, bool reconstruct>
 struct ShadowMnExpectedResults;
 
 template <bool reconstruct>
@@ -611,18 +611,18 @@ struct ShadowMnExpectedResults<RealSphericalHarmonicsMomentBasis<double, double,
 };
 
 
-template <class GridImp, class BasisfunctionImp, bool reconstruct>
-struct ShadowMnTestCase : SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>
+template <class GridImp, class MomentBasisImp, bool reconstruct>
+struct ShadowMnTestCase : SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>
 {
-  using BaseType = SourceBeamMnTestCase<GridImp, BasisfunctionImp, reconstruct>;
+  using BaseType = SourceBeamMnTestCase<GridImp, MomentBasisImp, reconstruct>;
   using typename BaseType::GridViewType;
-  using ProblemType = ShadowMn<GridViewType, BasisfunctionImp>;
+  using ProblemType = ShadowMn<GridViewType, MomentBasisImp>;
   using typename BaseType::RangeFieldType;
   static constexpr RangeFieldType t_end = 0.1;
   static constexpr bool reconstruction = reconstruct;
-  using ExpectedResultsType = ShadowMnExpectedResults<BasisfunctionImp, reconstruction>;
+  using ExpectedResultsType = ShadowMnExpectedResults<MomentBasisImp, reconstruction>;
   using RealizabilityLimiterChooserType = RealizabilityLimiterChooser<GridViewType,
-                                                                      BasisfunctionImp,
+                                                                      MomentBasisImp,
                                                                       typename ProblemType::FluxType,
                                                                       typename BaseType::DiscreteFunctionType>;
 };
diff --git a/dune/gdt/test/pn-discretization.hh b/dune/gdt/test/pn-discretization.hh
index aec39b437..1cb17d742 100644
--- a/dune/gdt/test/pn-discretization.hh
+++ b/dune/gdt/test/pn-discretization.hh
@@ -27,9 +27,9 @@
 #include <dune/gdt/local/numerical-fluxes/kinetic.hh>
 #include <dune/gdt/local/numerical-fluxes/lax-friedrichs.hh>
 #include <dune/gdt/local/operators/advection-fv.hh>
-#include <dune/gdt/timestepper/fractional-step.hh>
-#include <dune/gdt/timestepper/explicit-rungekutta.hh>
-#include <dune/gdt/timestepper/matrix-exponential-kinetic-isotropic.hh>
+#include <dune/gdt/tools/timestepper/fractional-step.hh>
+#include <dune/gdt/tools/timestepper/explicit-rungekutta.hh>
+#include <dune/gdt/tools/timestepper/matrix-exponential-kinetic-isotropic.hh>
 
 #include <dune/gdt/test/momentmodels/kineticequation.hh>
 
@@ -133,7 +133,7 @@ int parse_momentmodel_arguments(int argc,
 }
 
 
-template <class BasisfunctionType, class AnalyticalFluxType>
+template <class MomentBasis, class AnalyticalFluxType>
 struct EigenvectorWrapperChooser
 {
   using type = Dune::GDT::internal::EigenvectorWrapper<AnalyticalFluxType>;
@@ -192,7 +192,7 @@ struct HyperbolicPnDiscretization
     const size_t num_threads = 4;
     XT::Common::threadManager().set_max_threads(num_threads);
     //******************* get typedefs and constants from ProblemType **********************//
-    using BasisfunctionType = typename TestCaseType::BasisfunctionType;
+    using MomentBasis = typename TestCaseType::MomentBasis;
     using DiscreteFunctionType = typename TestCaseType::DiscreteFunctionType;
     using GridType = typename TestCaseType::GridType;
     using SpaceType = typename TestCaseType::SpaceType;
@@ -203,8 +203,8 @@ struct HyperbolicPnDiscretization
     using ProblemType = typename TestCaseType::ProblemType;
     using RangeFieldType = typename ProblemType::RangeFieldType;
     using BoundaryValueType = typename ProblemType::BoundaryValueType;
-    static constexpr size_t dimDomain = BasisfunctionType::dimDomain;
-    static constexpr size_t dimRange = BasisfunctionType::dimRange;
+    static constexpr size_t dimDomain = MomentBasis::dimDomain;
+    static constexpr size_t dimRange = MomentBasis::dimRange;
     using MatrixType = typename XT::LA::Container<RangeFieldType>::MatrixType;
     using VectorType = typename XT::LA::Container<RangeFieldType>::VectorType;
 
@@ -224,10 +224,10 @@ struct HyperbolicPnDiscretization
     if ((num_quad_refinements == size_t(-1) || quad_order == size_t(-1)) && (num_quad_refinements != quad_order))
       std::cerr << "You specified either num_quad_refinements or quad_order, please also specify the other one!"
                 << std::endl;
-    std::shared_ptr<const BasisfunctionType> basis_functions =
+    std::shared_ptr<const MomentBasis> basis_functions =
         (num_quad_refinements == size_t(-1) || quad_order == size_t(-1))
-            ? std::make_shared<const BasisfunctionType>()
-            : std::make_shared<const BasisfunctionType>(quad_order, num_quad_refinements);
+            ? std::make_shared<const MomentBasis>()
+            : std::make_shared<const MomentBasis>(quad_order, num_quad_refinements);
     const std::unique_ptr<ProblemType> problem_ptr =
         XT::Common::make_unique<ProblemType>(*basis_functions, grid_config);
     const auto& problem = *problem_ptr;
@@ -252,7 +252,7 @@ struct HyperbolicPnDiscretization
 
     // ******************** choose flux and rhs operator and timestepper ******************************************
     using AdvectionOperatorType = AdvectionFvOperator<MatrixType, GV, dimRange>;
-    using EigenvectorWrapperType = typename EigenvectorWrapperChooser<BasisfunctionType, AnalyticalFluxType>::type;
+    using EigenvectorWrapperType = typename EigenvectorWrapperChooser<MomentBasis, AnalyticalFluxType>::type;
     using ReconstructionOperatorType =
         LinearReconstructionOperator<AnalyticalFluxType, BoundaryValueType, GV, MatrixType, EigenvectorWrapperType>;
     using ReconstructionFvOperatorType =
@@ -263,7 +263,7 @@ struct HyperbolicPnDiscretization
         ExplicitRungeKuttaTimeStepper<FvOperatorType,
                                       DiscreteFunctionType,
                                       TimeStepperMethods::explicit_rungekutta_second_order_ssp>;
-    using RhsTimeStepperType = KineticIsotropicTimeStepper<DiscreteFunctionType, BasisfunctionType>;
+    using RhsTimeStepperType = KineticIsotropicTimeStepper<DiscreteFunctionType, MomentBasis>;
     using TimeStepperType = StrangSplittingTimeStepper<RhsTimeStepperType, OperatorTimeStepperType>;
 
     // *************** choose t_end and initial dt **************************************
@@ -277,7 +277,7 @@ struct HyperbolicPnDiscretization
     RangeFieldType dt = CFL * dx;
 
     // *********************** create operators and timesteppers ************************************
-    NumericalKineticFlux<I, BasisfunctionType> numerical_flux(*analytical_flux, *basis_functions);
+    NumericalKineticFlux<I, MomentBasis> numerical_flux(*analytical_flux, *basis_functions);
     //    NumericalLaxFriedrichsFlux<I, dimDomain, dimRange, RangeFieldType> numerical_flux(*analytical_flux, 1.);
     AdvectionOperatorType advection_operator(grid_view, numerical_flux, advection_source_space, fv_space);
     // boundary treatment
diff --git a/dune/gdt/timestepper/enums.hh b/dune/gdt/tools/timestepper/enums.hh
similarity index 100%
rename from dune/gdt/timestepper/enums.hh
rename to dune/gdt/tools/timestepper/enums.hh
diff --git a/dune/gdt/timestepper/explicit-rungekutta.hh b/dune/gdt/tools/timestepper/explicit-rungekutta.hh
similarity index 100%
rename from dune/gdt/timestepper/explicit-rungekutta.hh
rename to dune/gdt/tools/timestepper/explicit-rungekutta.hh
diff --git a/dune/gdt/timestepper/fractional-step.hh b/dune/gdt/tools/timestepper/fractional-step.hh
similarity index 100%
rename from dune/gdt/timestepper/fractional-step.hh
rename to dune/gdt/tools/timestepper/fractional-step.hh
diff --git a/dune/gdt/timestepper/interface.hh b/dune/gdt/tools/timestepper/interface.hh
similarity index 100%
rename from dune/gdt/timestepper/interface.hh
rename to dune/gdt/tools/timestepper/interface.hh
diff --git a/dune/gdt/timestepper/matrix-exponential-kinetic-isotropic.hh b/dune/gdt/tools/timestepper/matrix-exponential-kinetic-isotropic.hh
similarity index 90%
rename from dune/gdt/timestepper/matrix-exponential-kinetic-isotropic.hh
rename to dune/gdt/tools/timestepper/matrix-exponential-kinetic-isotropic.hh
index f2803e213..e4adbccf4 100644
--- a/dune/gdt/timestepper/matrix-exponential-kinetic-isotropic.hh
+++ b/dune/gdt/tools/timestepper/matrix-exponential-kinetic-isotropic.hh
@@ -22,7 +22,7 @@ namespace Dune {
 namespace GDT {
 
 
-template <class DiscreteFunctionType, class BasisfunctionType>
+template <class DiscreteFunctionType, class MomentBasis>
 class KineticIsotropicLocalFunctor
   : public XT::Grid::ElementFunctor<typename DiscreteFunctionType::SpaceType::GridViewType>
 {
@@ -30,13 +30,13 @@ class KineticIsotropicLocalFunctor
   using BaseType = typename XT::Grid::ElementFunctor<GridViewType>;
   using RangeType = typename DiscreteFunctionType::LocalFunctionType::RangeReturnType;
   using ScalarFunctionType =
-      XT::Functions::FunctionInterface<BasisfunctionType::dimDomain, 1, 1, typename BasisfunctionType::RangeFieldType>;
+      XT::Functions::FunctionInterface<MomentBasis::dimDomain, 1, 1, typename MomentBasis::RangeFieldType>;
   static constexpr size_t dimRange = DiscreteFunctionType::r;
 
 public:
   using typename BaseType::E;
 
-  KineticIsotropicLocalFunctor(const BasisfunctionType& basis_functions,
+  KineticIsotropicLocalFunctor(const MomentBasis& basis_functions,
                                DiscreteFunctionType& solution,
                                const double dt,
                                const ScalarFunctionType& sigma_a,
@@ -86,7 +86,7 @@ public:
   }
 
 private:
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   DiscreteFunctionType& solution_;
   const double dt_;
   const ScalarFunctionType& sigma_a_;
@@ -99,7 +99,7 @@ private:
 
 /** \brief Time stepper solving linear equation d_t u = Au + b by matrix exponential
  */
-template <class DiscreteFunctionImp, class BasisfunctionType>
+template <class DiscreteFunctionImp, class MomentBasis>
 class KineticIsotropicTimeStepper : public TimeStepperInterface<DiscreteFunctionImp>
 {
   typedef KineticIsotropicTimeStepper ThisType;
@@ -117,7 +117,7 @@ public:
   using BaseType::current_solution;
   using BaseType::current_time;
 
-  KineticIsotropicTimeStepper(const BasisfunctionType& basis_functions,
+  KineticIsotropicTimeStepper(const MomentBasis& basis_functions,
                               DiscreteFunctionType& initial_values,
                               const ScalarFunctionType& sigma_a,
                               const ScalarFunctionType& sigma_s,
@@ -135,7 +135,7 @@ public:
     const RangeFieldType actual_dt = std::min(dt, max_dt);
     auto& t = current_time();
     auto& u_n = current_solution();
-    KineticIsotropicLocalFunctor<DiscreteFunctionType, BasisfunctionType> functor(
+    KineticIsotropicLocalFunctor<DiscreteFunctionType, MomentBasis> functor(
         basis_functions_, u_n, actual_dt, sigma_a_, sigma_s_, Q_);
     auto walker = XT::Grid::Walker<typename DiscreteFunctionType::SpaceType::GridViewType>(u_n.space().grid_view());
     walker.append(functor);
@@ -145,7 +145,7 @@ public:
   } // ... step(...)
 
 private:
-  const BasisfunctionType& basis_functions_;
+  const MomentBasis& basis_functions_;
   const ScalarFunctionType& sigma_a_;
   const ScalarFunctionType& sigma_s_;
   const ScalarFunctionType& Q_;
-- 
GitLab