From 723a10040cb48f2c44f738eb92d3944ce02de73f Mon Sep 17 00:00:00 2001
From: Tobias Leibner <tobias.leibner@uni-muenster.de>
Date: Fri, 6 Apr 2018 15:57:12 +0200
Subject: [PATCH] [container.common.matrix.dense] do not depend on
 DynamicMatrix as backend

DynamicMatrix is essentially a vector of vectors, so the memory layout is
not contiguous. We would like to be able to use algorithms from lapack and
blas, so I changed the backend to be a single std::vector plus index arithmetic
---
 dune/xt/la/algorithms/cholesky.hh           |   6 +-
 dune/xt/la/algorithms/qr.hh                 | 161 ++++++++++----
 dune/xt/la/algorithms/triangular_solves.hh  |  77 +++++--
 dune/xt/la/container/common/matrix/dense.hh | 231 +++++++++-----------
 dune/xt/la/solver/common.hh                 |   5 +-
 5 files changed, 292 insertions(+), 188 deletions(-)

diff --git a/dune/xt/la/algorithms/cholesky.hh b/dune/xt/la/algorithms/cholesky.hh
index 1745807c3..09b284058 100644
--- a/dune/xt/la/algorithms/cholesky.hh
+++ b/dune/xt/la/algorithms/cholesky.hh
@@ -53,8 +53,8 @@ solve_tridiag_ldlt(const FirstVectorType& diag, const SecondVectorType& subdiag,
   typedef Common::VectorAbstraction<VectorType> V;
   typedef typename V::ScalarType ScalarType;
   size_t size = vec.size();
-  auto L = eye_matrix<CommonSparseMatrix<ScalarType>>(
-      size, Common::diagonal_pattern(size, size) + Common::diagonal_pattern(size, size, -1));
+  auto L =
+      eye_matrix<CommonSparseMatrix<ScalarType>>(size, diagonal_pattern(size, size) + diagonal_pattern(size, size, -1));
   for (size_t ii = 0; ii < size - 1; ++ii)
     L.set_entry(ii + 1, ii, V2::get_entry(subdiag, ii));
   // solve LDL^T x = rhs;
@@ -270,7 +270,7 @@ struct LDLTSolver
       ;
 #if HAVE_MKL || HAVE_LAPACKE
     } else if (is_contiguous) {
-      assert(std::max(M::cols(rhs), size) <= std::numeric_limits<int>::max());
+      assert(V::is_vector || std::max(M::cols(rhs), size) <= std::numeric_limits<int>::max());
       int rhs_cols = V::is_vector ? 1 : int(M::cols(rhs));
       int info = Common::Lapacke::dpttrs(is_row_major ? Common::Lapacke::row_major() : Common::Lapacke::col_major(),
                                          static_cast<int>(size),
diff --git a/dune/xt/la/algorithms/qr.hh b/dune/xt/la/algorithms/qr.hh
index b93e60992..b49c5588b 100644
--- a/dune/xt/la/algorithms/qr.hh
+++ b/dune/xt/la/algorithms/qr.hh
@@ -11,6 +11,8 @@
 #ifndef DUNE_XT_LA_ALGORITHMS_QR_HH
 #define DUNE_XT_LA_ALGORITHMS_QR_HH
 
+#include <complex>
+
 #include <dune/xt/common/lapacke.hh>
 #include <dune/xt/common/matrix.hh>
 #include <dune/xt/common/vector.hh>
@@ -63,10 +65,11 @@ void multiply_householder_from_left(MatrixType& A,
                                     const size_t col_begin,
                                     const size_t col_end)
 {
-  typedef Common::MatrixAbstraction<MatrixType> M;
-  typedef Common::VectorAbstraction<VectorType> V;
+  using M = Common::MatrixAbstraction<MatrixType>;
+  using V = Common::VectorAbstraction<VectorType>;
+  using ScalarType = typename M::ScalarType;
   // calculate w^T A first
-  VectorType wT_A = V::create(M::cols(A), 0.);
+  VectorType wT_A = V::create(M::cols(A), ScalarType(0.));
   for (size_t rr = row_begin; rr < row_end; ++rr)
     for (size_t cc = col_begin; cc < col_end; ++cc)
       V::add_to_entry(wT_A, cc, V::get_entry(v, rr) * M::get_entry(A, rr, cc));
@@ -113,11 +116,12 @@ void multiply_householder_from_right(MatrixType& A,
 template <class MatrixType, class VectorType, class IndexVectorType>
 void qr_decomposition(MatrixType& A, VectorType& tau, IndexVectorType& permutations)
 {
-  typedef Common::MatrixAbstraction<MatrixType> M;
-  typedef Common::VectorAbstraction<VectorType> V;
-  typedef Common::VectorAbstraction<IndexVectorType> VI;
-  typedef typename VI::ScalarType IndexType;
-  typedef typename M::RealType RealType;
+  using M = typename Common::MatrixAbstraction<MatrixType>;
+  using V = typename Common::VectorAbstraction<VectorType>;
+  using VI = typename Common::VectorAbstraction<IndexVectorType>;
+  using IndexType = typename VI::ScalarType;
+  using RealType = typename M::RealType;
+  using ScalarType = typename M::ScalarType;
 
   const size_t num_rows = M::rows(A);
   const size_t num_cols = M::cols(A);
@@ -131,9 +135,9 @@ void qr_decomposition(MatrixType& A, VectorType& tau, IndexVectorType& permutati
   std::vector<RealType> col_norms(num_cols);
   for (size_t rr = 0; rr < num_rows; ++rr)
     for (size_t cc = 0; cc < num_cols; ++cc)
-      col_norms[cc] += std::pow(M::get_entry(A, rr, cc), 2);
+      col_norms[cc] += std::pow(std::abs(M::get_entry(A, rr, cc)), 2);
 
-  VectorType w = V::create(num_rows, 0.);
+  VectorType w = V::create(num_rows, ScalarType(0.));
 
   for (size_t jj = 0; jj < num_cols - 1; ++jj) {
 
@@ -142,14 +146,14 @@ void qr_decomposition(MatrixType& A, VectorType& tau, IndexVectorType& permutati
     auto max_it = std::max_element(col_norms.begin() + jj, col_norms.end());
     size_t max_index = std::distance(col_norms.begin(), max_it);
     if (max_index != jj) {
-      auto tmp = col_norms[jj];
+      auto tmp_real = col_norms[jj];
       col_norms[jj] = col_norms[max_index];
-      col_norms[max_index] = tmp;
+      col_norms[max_index] = tmp_real;
       auto tmp_index = VI::get_entry(permutations, jj);
       VI::set_entry(permutations, jj, VI::get_entry(permutations, max_index));
       VI::set_entry(permutations, max_index, tmp_index);
       for (size_t rr = 0; rr < num_rows; ++rr) {
-        tmp = M::get_entry(A, rr, max_index);
+        auto tmp = M::get_entry(A, rr, max_index);
         M::set_entry(A, rr, max_index, M::get_entry(A, rr, jj));
         M::set_entry(A, rr, jj, tmp);
       }
@@ -159,11 +163,11 @@ void qr_decomposition(MatrixType& A, VectorType& tau, IndexVectorType& permutati
     // Reduction by householder matrix
     auto normx = 0.;
     for (size_t rr = jj; rr < num_rows; ++rr)
-      normx += std::pow(M::get_entry(A, rr, jj), 2);
+      normx += std::pow(std::abs(M::get_entry(A, rr, jj)), 2);
     normx = std::sqrt(normx);
 
     if (normx != 0.) {
-      const auto s = -sign(M::get_entry(A, jj, jj));
+      const auto s = -sign(std::real(M::get_entry(A, jj, jj)));
       const auto u1 = M::get_entry(A, jj, jj) - s * normx;
       V::set_entry(w, jj, 1.);
       for (size_t rr = jj + 1; rr < num_rows; ++rr) {
@@ -171,14 +175,14 @@ void qr_decomposition(MatrixType& A, VectorType& tau, IndexVectorType& permutati
         M::set_entry(A, rr, jj, V::get_entry(w, rr));
       }
       M::set_entry(A, jj, jj, s * normx);
-      V::set_entry(tau, jj, -s * u1 / normx);
+      V::set_entry(tau, jj, static_cast<ScalarType>(-s) * u1 / normx);
       // calculate A = H A
       multiply_householder_from_left(A, V::get_entry(tau, jj), w, jj, num_rows, jj + 1, num_cols);
     } // if (normx != 0)
 
     // Norm downdate
     for (size_t cc = jj + 1; cc < num_cols; ++cc)
-      col_norms[cc] -= std::pow(M::get_entry(A, jj, cc), 2);
+      col_norms[cc] -= std::pow(std::abs(M::get_entry(A, jj, cc)), 2);
 
   } // jj
 } // void qr_decomposition(...)
@@ -211,13 +215,13 @@ struct QrHelper
       assert(std::max(M::rows(A), M::cols(A)) < std::numeric_limits<int>::max());
       auto num_rows = static_cast<int>(M::rows(A));
       auto num_cols = static_cast<int>(M::cols(A));
-      auto info = Common::Lapacke::dgeqp3(lapacke_storage_layout(),
-                                          num_rows,
-                                          num_cols,
-                                          M::data(A),
-                                          is_row_major ? num_cols : num_rows,
-                                          VI::data(permutations),
-                                          V::data(tau));
+      auto info = geqp3(lapacke_storage_layout(),
+                        num_rows,
+                        num_cols,
+                        M::data(A),
+                        is_row_major ? num_cols : num_rows,
+                        VI::data(permutations),
+                        V::data(tau));
       if (info)
         DUNE_THROW(Dune::MathError, "QR factorization failed");
       // Lapack indices are 1-based, convert to 0-based
@@ -269,8 +273,9 @@ struct QrHelper
   static void
   apply_q_from_qr(const MatrixType& QR, const VectorType& tau, const SecondVectorType& x, ThirdVectorType& y)
   {
-    typedef Common::VectorAbstraction<SecondVectorType> V2;
-    typedef Common::VectorAbstraction<ThirdVectorType> V3;
+    using V2 = Common::VectorAbstraction<SecondVectorType>;
+    using V3 = Common::VectorAbstraction<ThirdVectorType>;
+    using ScalarType = typename V2::ScalarType;
     for (size_t ii = 0; ii < M::rows(QR); ++ii)
       V3::set_entry(y, ii, V2::get_entry(x, ii));
     if (false) {
@@ -282,25 +287,25 @@ struct QrHelper
       assert(x.size() < std::numeric_limits<int>::max());
       const int num_rows = static_cast<int>(x.size());
       const int num_cols = 1;
-      auto info = Common::Lapacke::dormqr(lapacke_storage_layout(),
-                                          'L',
-                                          transpose == XT::Common::Transpose::yes ? 'T' : 'N',
-                                          num_rows,
-                                          num_cols,
-                                          num_rows,
-                                          M::data(QR),
-                                          num_rows,
-                                          V::data(tau),
-                                          V3::data(y),
-                                          is_row_major ? num_cols : num_rows);
+      auto info = multiply_by_q(lapacke_storage_layout(),
+                                'L',
+                                transpose == XT::Common::Transpose::yes ? 'T' : 'N',
+                                num_rows,
+                                num_cols,
+                                num_rows,
+                                M::data(QR),
+                                num_rows,
+                                V::data(tau),
+                                V3::data(y),
+                                is_row_major ? num_cols : num_rows);
       if (info)
-        DUNE_THROW(Dune::MathError, "Multiplication by Q^T failed");
+        DUNE_THROW(Dune::MathError, "Multiplication by Q or Q^T failed");
 #endif // HAVE_LAPACKE || HAVE_MKL
     } else {
       assert(M::cols(QR) < std::numeric_limits<int>::max());
       const auto num_rows = M::rows(QR);
       const auto num_cols = static_cast<int>(M::cols(QR));
-      VectorType w = V::create(num_rows, 0.);
+      VectorType w = V::create(num_rows, ScalarType(0.));
       if (transpose == XT::Common::Transpose::no)
         for (int jj = num_cols - 1; jj >= 0; --jj) {
           set_w_vector(QR, jj, w);
@@ -330,7 +335,7 @@ private:
     const size_t num_rows = M::rows(QR);
     const size_t num_cols = M::cols(QR);
     auto ret = eye_matrix<typename M::template MatrixTypeTemplate<M::static_rows, M::static_rows>>(
-        num_rows, Common::dense_pattern(num_rows, num_rows));
+        num_rows, dense_pattern(num_rows, num_rows));
     VectorType w = V::create(num_rows, 1.);
     for (int jj = int(num_cols) - 1; jj >= 0; --jj) {
       set_w_vector(QR, jj, w);
@@ -338,6 +343,54 @@ private:
     }
     return ret;
   }
+
+#if HAVE_LAPACKE || HAVE_MKL
+  template <class ScalarType>
+  static std::enable_if_t<Common::is_arithmetic<ScalarType>::value, int>
+  geqp3(int matrix_layout, int m, int n, ScalarType* a, int lda, int* jpvt, ScalarType* tau)
+  {
+    return Common::Lapacke::dgeqp3(matrix_layout, m, n, a, lda, jpvt, tau);
+  }
+
+  template <class ScalarType>
+  static std::enable_if_t<Common::is_complex<ScalarType>::value, int>
+  geqp3(int matrix_layout, int m, int n, ScalarType* a, int lda, int* jpvt, ScalarType* tau)
+  {
+    return Common::Lapacke::zgeqp3(matrix_layout, m, n, a, lda, jpvt, tau);
+  }
+
+  template <class ScalarType>
+  static std::enable_if_t<Common::is_arithmetic<ScalarType>::value, int> multiply_by_q(int matrix_layout,
+                                                                                       char side,
+                                                                                       char trans,
+                                                                                       int m,
+                                                                                       int n,
+                                                                                       int k,
+                                                                                       const ScalarType* a,
+                                                                                       int lda,
+                                                                                       const ScalarType* tau,
+                                                                                       ScalarType* c,
+                                                                                       int ldc)
+  {
+    return Common::Lapacke::dormqr(matrix_layout, side, trans, m, n, k, a, lda, tau, c, ldc);
+  }
+
+  template <class ScalarType>
+  static std::enable_if_t<Common::is_complex<ScalarType>::value, int> multiply_by_q(int matrix_layout,
+                                                                                    char side,
+                                                                                    char trans,
+                                                                                    int m,
+                                                                                    int n,
+                                                                                    int k,
+                                                                                    const ScalarType* a,
+                                                                                    int lda,
+                                                                                    const ScalarType* tau,
+                                                                                    ScalarType* c,
+                                                                                    int ldc)
+  {
+    return Common::Lapacke::zunmqr(matrix_layout, side, trans == 'T' ? 'C' : trans, m, n, k, a, lda, tau, c, ldc);
+  }
+#endif // HAVE_LAPACKE || HAVE_MKL
 }; // struct QrHelper
 
 
@@ -396,10 +449,9 @@ void solve_qr_factorized(const MatrixType& QR,
                          const RhsVectorType& b,
                          SecondVectorType* work = nullptr)
 {
-
-  typedef Common::MatrixAbstraction<MatrixType> M;
-  typedef Common::VectorAbstraction<VectorType> V;
-  typedef Common::VectorAbstraction<IndexVectorType> VI;
+  using M = Common::MatrixAbstraction<MatrixType>;
+  using V2 = Common::VectorAbstraction<SecondVectorType>;
+  using VI = Common::VectorAbstraction<IndexVectorType>;
 
   auto num_rows = M::rows(QR);
   auto num_cols = M::cols(QR);
@@ -421,7 +473,7 @@ void solve_qr_factorized(const MatrixType& QR,
     // Currently, the implementation of solve_upper_triangular for sparse matrices relies on a triangular pattern, so
     // copy R from QR to its own matrix first
     MatrixType R = eye_matrix<MatrixType>(
-        num_rows, num_cols, Common::triangular_pattern(num_rows, num_cols, Common::MatrixPattern::upper_triangular));
+        num_rows, num_cols, triangular_pattern(num_rows, num_cols, Common::MatrixPattern::upper_triangular));
     for (size_t ii = 0; ii < num_rows; ++ii)
       for (size_t jj = ii; jj < num_cols; ++jj)
         M::set_entry(R, ii, jj, M::get_entry(QR, ii, jj));
@@ -432,9 +484,26 @@ void solve_qr_factorized(const MatrixType& QR,
 
   // Undo permutations
   for (int ii = 0; ii < int(M::rows(QR)); ++ii)
-    V::set_entry(x, VI::get_entry(permutations, ii), V::get_entry(*work, ii));
+    V2::set_entry(x, VI::get_entry(permutations, ii), V2::get_entry(*work, ii));
 }
 
+/**
+ *  \brief Performs a QR decomposition to solve Ax = b
+ *  \see qr
+ */
+template <class MatrixType, class VectorType, class SecondVectorType>
+typename std::enable_if_t<Common::is_matrix<MatrixType>::value && Common::is_vector<VectorType>::value
+                              && Common::is_vector<SecondVectorType>::value,
+                          void>
+solve_by_qr_decomposition(MatrixType& A, VectorType& x, const SecondVectorType& b)
+{
+  using M = typename Common::MatrixAbstraction<MatrixType>;
+  std::vector<typename M::ScalarType> tau(M::cols(A));
+  std::vector<int> permutations(M::cols(A));
+  qr(A, tau, permutations);
+  solve_qr_factorized(A, tau, permutations, x, b);
+} // void solve_lower_triangular(...)
+
 
 } // namespace LA
 } // namespace XT
diff --git a/dune/xt/la/algorithms/triangular_solves.hh b/dune/xt/la/algorithms/triangular_solves.hh
index 90c0859cf..c4f4a1b33 100644
--- a/dune/xt/la/algorithms/triangular_solves.hh
+++ b/dune/xt/la/algorithms/triangular_solves.hh
@@ -194,8 +194,9 @@ template <class MatrixType,
           Common::StorageLayout storage_layout = Common::MatrixAbstraction<MatrixType>::storage_layout>
 struct TriangularSolver
 {
-  typedef Common::MatrixAbstraction<MatrixType> M;
-  typedef Common::VectorAbstraction<VectorType> V;
+  using M = typename Common::MatrixAbstraction<MatrixType>;
+  using V = typename Common::VectorAbstraction<VectorType>;
+  using ScalarType = typename M::ScalarType;
 
   static void solve(const MatrixType& A, VectorType& x)
   {
@@ -227,18 +228,18 @@ struct TriangularSolver
       const int blas_triangular =
           (triangular_type == Common::MatrixPattern::upper_triangular) ? Common::Blas::upper() : Common::Blas::lower();
       const int blas_trans = (transpose == Common::Transpose::yes) ? Common::Blas::trans() : Common::Blas::no_trans();
-      Common::Blas::dtrsm(blas_storage_layout,
-                          Common::Blas::left(),
-                          blas_triangular,
-                          blas_trans,
-                          Common::Blas::non_unit(),
-                          num_rows,
-                          1,
-                          1.,
-                          M::data(A),
-                          num_rows,
-                          rhs,
-                          storage_layout == Common::StorageLayout::dense_row_major ? 1 : num_rows);
+      trsm(blas_storage_layout,
+           Common::Blas::left(),
+           blas_triangular,
+           blas_trans,
+           Common::Blas::non_unit(),
+           num_rows,
+           1,
+           ScalarType(1.),
+           M::data(A),
+           num_rows,
+           rhs,
+           storage_layout == Common::StorageLayout::dense_row_major ? 1 : num_rows);
 #endif // HAVE_MKL || HAVE_LAPACKE
     } else if (storage_layout == Common::StorageLayout::csr) {
       if (lower && !trans) {
@@ -275,6 +276,54 @@ struct TriangularSolver
         backward_solve<MatrixType, decltype(xp), trans>(A, xp, rhs);
     }
   } // static void solve(...)
+
+#if HAVE_LAPACKE || HAVE_MKL
+private:
+  template <class ScalarType>
+  static std::enable_if_t<Common::is_arithmetic<ScalarType>::value, void> trsm(const int layout,
+                                                                               const int side,
+                                                                               const int uplo,
+                                                                               const int transa,
+                                                                               const int diag,
+                                                                               const int m,
+                                                                               const int n,
+                                                                               const ScalarType alpha,
+                                                                               const ScalarType* a,
+                                                                               const int lda,
+                                                                               ScalarType* b,
+                                                                               const int ldb)
+  {
+    return Common::Blas::dtrsm(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb);
+  }
+
+  template <class ScalarType>
+  static std::enable_if_t<Common::is_complex<ScalarType>::value, void> trsm(const int layout,
+                                                                            const int side,
+                                                                            const int uplo,
+                                                                            const int transa,
+                                                                            const int diag,
+                                                                            const int m,
+                                                                            const int n,
+                                                                            const ScalarType alpha,
+                                                                            const ScalarType* a,
+                                                                            const int lda,
+                                                                            ScalarType* b,
+                                                                            const int ldb)
+  {
+    return Common::Blas::ztrsm(layout,
+                               side,
+                               uplo,
+                               transa,
+                               diag,
+                               m,
+                               n,
+                               static_cast<const void*>(&alpha),
+                               static_cast<const void*>(a),
+                               lda,
+                               static_cast<void*>(b),
+                               ldb);
+  }
+#endif // HAVE_LAPACKE || HAVE_MKL
 };
 
 // specialization for CommonSparseOrDenseMatrix
diff --git a/dune/xt/la/container/common/matrix/dense.hh b/dune/xt/la/container/common/matrix/dense.hh
index fcd5ec1ac..d1b4206c5 100644
--- a/dune/xt/la/container/common/matrix/dense.hh
+++ b/dune/xt/la/container/common/matrix/dense.hh
@@ -13,26 +13,20 @@
 #define DUNE_XT_LA_CONTAINER_COMMON_MATRIX_DENSE_HH
 
 #include <cmath>
-#include <initializer_list>
 #include <memory>
-#include <type_traits>
-#include <vector>
-#include <complex>
 #include <mutex>
+#include <vector>
 
-#include <dune/common/dynmatrix.hh>
-#include <dune/common/densematrix.hh>
-#include <dune/common/float_cmp.hh>
 #include <dune/common/ftraits.hh>
 #include <dune/common/unused.hh>
 
 #include <dune/xt/common/exceptions.hh>
+#include <dune/xt/common/float_cmp.hh>
+#include <dune/xt/common/vector.hh>
 
-#include <dune/xt/la/container/interfaces.hh>
+#include <dune/xt/la/container/matrix-interface.hh>
 #include <dune/xt/la/container/pattern.hh>
 
-#include "../vector.hh"
-
 namespace Dune {
 namespace XT {
 namespace LA {
@@ -41,18 +35,47 @@ namespace LA {
 template <class ScalarImp>
 class CommonDenseMatrix;
 
+template <class ScalarImp>
+class CommonSparseVector;
+
 
 namespace internal {
 
 
+template <class ScalarType>
+struct RowMajorMatrixBackend
+{
+  RowMajorMatrixBackend(size_t num_rows, size_t num_cols, const ScalarType value = ScalarType(0))
+    : num_rows_(num_rows)
+    , num_cols_(num_cols)
+    , entries_(num_rows_ * num_cols_, value)
+  {
+  }
+
+  ScalarType& get_entry_ref(size_t rr, size_t cc)
+  {
+    return entries_[rr * num_cols_ + cc];
+  }
+
+  const ScalarType& get_entry_ref(size_t rr, size_t cc) const
+  {
+    return entries_[rr * num_cols_ + cc];
+  }
+
+  size_t num_rows_;
+  size_t num_cols_;
+  std::vector<ScalarType> entries_;
+};
+
+
 template <class ScalarImp>
 class CommonDenseMatrixTraits
 {
 public:
-  typedef typename Dune::FieldTraits<ScalarImp>::field_type ScalarType;
-  typedef typename Dune::FieldTraits<ScalarImp>::real_type RealType;
-  typedef CommonDenseMatrix<ScalarType> derived_type;
-  typedef Dune::DynamicMatrix<ScalarType> BackendType;
+  using ScalarType = typename Dune::FieldTraits<ScalarImp>::field_type;
+  using RealType = typename Dune::FieldTraits<ScalarImp>::real_type;
+  using BackendType = RowMajorMatrixBackend<ScalarType>;
+  using derived_type = CommonDenseMatrix<ScalarType>;
   static const Backends backend_type = Backends::common_dense;
   static const Backends vector_type = Backends::common_dense;
   static const constexpr bool sparse = false;
@@ -63,26 +86,26 @@ public:
 
 
 /**
- *  \brief  A dense matrix implementation of MatrixInterface using the Dune::DynamicMatrix.
+ *  \brief  A dense matrix implementation of MatrixInterface using the a std::vector backend.
  */
 template <class ScalarImp = double>
 class CommonDenseMatrix : public MatrixInterface<internal::CommonDenseMatrixTraits<ScalarImp>, ScalarImp>,
                           public ProvidesBackend<internal::CommonDenseMatrixTraits<ScalarImp>>
 {
-  typedef CommonDenseMatrix<ScalarImp> ThisType;
-  typedef MatrixInterface<internal::CommonDenseMatrixTraits<ScalarImp>, ScalarImp> MatrixInterfaceType;
+  using ThisType = CommonDenseMatrix;
+  using MatrixInterfaceType = MatrixInterface<internal::CommonDenseMatrixTraits<ScalarImp>, ScalarImp>;
 
 public:
-  typedef internal::CommonDenseMatrixTraits<ScalarImp> Traits;
-  typedef typename Traits::BackendType BackendType;
-  typedef typename Traits::ScalarType ScalarType;
-  typedef typename Traits::RealType RealType;
+  using Traits = typename internal::CommonDenseMatrixTraits<ScalarImp>;
+  using BackendType = typename Traits::BackendType;
+  using ScalarType = typename Traits::ScalarType;
+  using RealType = typename Traits::RealType;
 
   explicit CommonDenseMatrix(const size_t rr = 0,
                              const size_t cc = 0,
                              const ScalarType value = ScalarType(0),
                              const size_t num_mutexes = 1)
-    : backend_(new BackendType(rr, cc, value))
+    : backend_(std::make_shared<BackendType>(rr, cc, value))
     , mutexes_(num_mutexes > 0 ? std::make_shared<std::vector<std::mutex>>(num_mutexes) : nullptr)
     , unshareable_(false)
   {
@@ -93,7 +116,7 @@ public:
                     const size_t cc,
                     const SparsityPatternDefault& /*pattern*/,
                     const size_t num_mutexes = 1)
-    : backend_(new BackendType(rr, cc, ScalarType(0)))
+    : backend_(std::make_shared<BackendType>(rr, cc, ScalarType(0)))
     , mutexes_(num_mutexes > 0 ? std::make_shared<std::vector<std::mutex>>(num_mutexes) : nullptr)
     , unshareable_(false)
   {
@@ -125,7 +148,7 @@ public:
 
   template <class T>
   CommonDenseMatrix(const DenseMatrix<T>& other, const size_t num_mutexes = 1)
-    : backend_(new BackendType(other.rows(), other.cols()))
+    : backend_(std::make_shared<BackendType>(other.rows(), other.cols()))
     , mutexes_(num_mutexes > 0 ? std::make_shared<std::vector<std::mutex>>(num_mutexes) : nullptr)
     , unshareable_(false)
   {
@@ -193,12 +216,12 @@ public:
 
   ScalarType* data()
   {
-    return &(backend()[0][0]);
+    return backend().entries_.data();
   }
 
   const ScalarType* data() const
   {
-    return &(backend()[0][0]);
+    return backend().entries_.data();
   }
 
   /// \}
@@ -214,7 +237,8 @@ public:
   {
     ensure_uniqueness();
     const internal::VectorLockGuard DUNE_UNUSED(guard)(mutexes_);
-    *backend_ *= alpha;
+    for (auto& entry : backend_->entries_)
+      entry *= alpha;
   }
 
   void axpy(const ScalarType& alpha, const ThisType& xx)
@@ -228,7 +252,8 @@ public:
                                      << "x"
                                      << cols()
                                      << ")!");
-    backend_->axpy(alpha, xx.backend());
+    for (size_t ii = 0; ii < backend_->entries_.size(); ++ii)
+      backend_->entries_[ii] += alpha * xx.backend_->entries_[ii];
   } // ... axpy(...)
 
   bool has_equal_shape(const ThisType& other) const
@@ -242,36 +267,42 @@ public:
 
   inline size_t rows() const
   {
-    return backend_->rows();
+    return backend_->num_rows_;
   }
 
   inline size_t cols() const
   {
-    return backend_->cols();
-  }
-
-  template <class FirstTraits, class SecondTraits>
-  inline void mv(const VectorInterface<FirstTraits, ScalarType>& xx,
-                 VectorInterface<SecondTraits, ScalarType>& yy) const
-  {
-    mv_helper<FirstTraits, SecondTraits>::mv(xx, yy, this);
-  }
-
-  inline void mv(const CommonDenseVector<ScalarType>& xx, CommonDenseVector<ScalarType>& yy) const
-  {
-    backend_->mv(xx.backend(), yy.backend());
+    return backend_->num_cols_;
   }
 
-  template <class FirstVectorImp, class SecondVectorImp>
-  inline void mv(const Dune::DenseVector<FirstVectorImp>& xx, Dune::DenseVector<SecondVectorImp>& yy) const
+  template <class FirstVectorType, class SecondVectorType>
+  inline void mv(const FirstVectorType& xx, SecondVectorType& yy) const
   {
-    backend_->mv(xx, yy);
+    using V1 = typename Common::VectorAbstraction<FirstVectorType>;
+    using V2 = typename Common::VectorAbstraction<SecondVectorType>;
+    static_assert(V1::is_vector && V2::is_vector, "");
+    assert(xx.size() == cols() && yy.size() == rows());
+    yy *= ScalarType(0.);
+    for (size_t rr = 0; rr < rows(); ++rr) {
+      V2::set_entry(yy, rr, 0.);
+      for (size_t cc = 0; cc < cols(); ++cc)
+        V2::add_to_entry(yy, rr, get_entry(rr, cc) * V1::get_entry(xx, cc));
+    }
   }
 
-  template <class FirstVectorImp, class SecondVectorImp>
-  inline void mtv(const Dune::DenseVector<FirstVectorImp>& xx, Dune::DenseVector<SecondVectorImp>& yy) const
-  {
-    backend_->mtv(xx, yy);
+  template <class FirstVectorType, class SecondVectorType>
+  inline void mtv(const FirstVectorType& xx, SecondVectorType& yy) const
+  {
+    using V1 = typename Common::VectorAbstraction<FirstVectorType>;
+    using V2 = typename Common::VectorAbstraction<SecondVectorType>;
+    static_assert(V1::is_vector && V2::is_vector, "");
+    assert(V1::size(xx) == rows() && V1::size(yy) == cols());
+    yy *= ScalarType(0.);
+    for (size_t cc = 0; cc < cols(); ++cc) {
+      V2::set_entry(yy, cc, 0.);
+      for (size_t rr = 0; rr < rows(); ++rr)
+        V2::add_to_entry(yy, cc, get_entry(cc, rr) * V1::get_entry(xx, rr));
+    }
   }
 
   void mtv(const CommonSparseVector<ScalarType>& xx, CommonSparseVector<ScalarType>& yy) const
@@ -279,12 +310,13 @@ public:
     yy.clear();
     const auto& vec_entries = xx.entries();
     const auto& vec_indices = xx.indices();
-    thread_local Dune::DynamicVector<ScalarType> tmp_vec;
+    thread_local std::vector<ScalarType> tmp_vec;
     tmp_vec.resize(cols(), 0.);
     std::fill(tmp_vec.begin(), tmp_vec.end(), 0.);
     for (size_t ii = 0; ii < vec_entries.size(); ++ii) {
-      const size_t cc = vec_indices[ii];
-      tmp_vec.axpy(vec_entries[ii], (*backend_)[cc]);
+      const size_t rr = vec_indices[ii];
+      for (size_t cc = 0; cc < cols(); ++cc)
+        tmp_vec[cc] += vec_entries[ii] * backend_->get_entry_ref(rr, cc);
     }
     for (size_t cc = 0; cc < cols(); ++cc)
       if (XT::Common::FloatCmp::ne(tmp_vec[cc], 0.))
@@ -297,7 +329,7 @@ public:
     internal::LockGuard DUNE_UNUSED(lock)(mutexes_, ii);
     assert(ii < rows());
     assert(jj < cols());
-    (*backend_)[ii][jj] += value;
+    backend_->get_entry_ref(ii, jj) += value;
   } // ... add_to_entry(...)
 
   void set_entry(const size_t ii, const size_t jj, const ScalarType& value)
@@ -305,14 +337,14 @@ public:
     ensure_uniqueness();
     assert(ii < rows());
     assert(jj < cols());
-    (*backend_)[ii][jj] = value;
+    backend_->get_entry_ref(ii, jj) = value;
   } // ... set_entry(...)
 
   ScalarType get_entry(const size_t ii, const size_t jj) const
   {
     assert(ii < rows());
     assert(jj < cols());
-    return (*backend_)[ii][jj];
+    return backend_->get_entry_ref(ii, jj);
   } // ... get_entry(...)
 
   void clear_row(const size_t ii)
@@ -321,7 +353,7 @@ public:
     if (ii >= rows())
       DUNE_THROW(Common::Exceptions::index_out_of_range,
                  "Given ii (" << ii << ") is larger than the rows of this (" << rows() << ")!");
-    std::fill((*backend_)[ii].begin(), (*backend_)[ii].end(), ScalarType(0));
+    std::fill_n(&(backend_->get_entry_ref(ii, 0)), cols(), ScalarType(0));
   } // ... clear_row(...)
 
   void clear_col(const size_t jj)
@@ -331,7 +363,7 @@ public:
       DUNE_THROW(Common::Exceptions::index_out_of_range,
                  "Given jj (" << jj << ") is larger than the cols of this (" << cols() << ")!");
     for (size_t ii = 0; ii < rows(); ++ii)
-      (*backend_)[ii][jj] = ScalarType(0);
+      backend_->get_entry_ref(ii, jj) = ScalarType(0);
   } // ... clear_col(...)
 
   void unit_row(const size_t ii)
@@ -343,10 +375,8 @@ public:
     if (ii >= rows())
       DUNE_THROW(Common::Exceptions::index_out_of_range,
                  "Given ii (" << ii << ") is larger than the rows of this (" << rows() << ")!");
-    auto& row = (*backend_)[ii];
-    for (size_t jj = 0; jj < cols(); ++jj)
-      row[jj] = ScalarType(0);
-    row[ii] = ScalarType(1);
+    clear_row(ii);
+    set_entry(ii, ii, ScalarType(1));
   } // ... unit_row(...)
 
   void unit_col(const size_t jj)
@@ -358,21 +388,15 @@ public:
     if (jj >= rows())
       DUNE_THROW(Common::Exceptions::index_out_of_range,
                  "Given jj (" << jj << ") is larger than the rows of this (" << rows() << ")!");
-    for (size_t ii = 0; ii < rows(); ++ii)
-      (*backend_)[ii][jj] = ScalarType(0);
-    (*backend_)[jj][jj] = ScalarType(1);
+    clear_col(jj);
+    set_entry(jj, jj, ScalarType(1));
   } // ... unit_col(...)
 
   bool valid() const
   {
-    for (size_t ii = 0; ii < rows(); ++ii) {
-      const auto& row_vec = (*backend_)[ii];
-      for (size_t jj = 0; jj < cols(); ++jj) {
-        const auto& entry = row_vec[jj];
-        if (Common::isnan(entry) || Common::isinf(entry))
-          return false;
-      }
-    }
+    for (const auto& entry : backend_->entries_)
+      if (Common::isnan(entry) || Common::isinf(entry))
+        return false;
     return true;
   } // ... valid(...)
 
@@ -389,33 +413,20 @@ public:
     *backend_ = *other.backend_;
   }
 
-  template <class OtherMatrixTraits>
-  void rightmultiply(const MatrixInterface<OtherMatrixTraits, ScalarType>& other)
+  template <class OtherMatrixType>
+  void rightmultiply(const OtherMatrixType& other)
   {
+    using M = typename Common::MatrixAbstraction<OtherMatrixType>;
+    static_assert(M::is_matrix, "");
     ensure_uniqueness();
-    BackendType new_backend(rows(), other.cols(), ScalarType(0.));
-    if (other.rows() != cols())
+    BackendType new_backend(rows(), M::cols(other), ScalarType(0.));
+    if (M::rows(other) != cols())
       DUNE_THROW(Dune::XT::Common::Exceptions::shapes_do_not_match,
                  "For rightmultiply, the number of columns of this has to match the number of rows of other!");
     for (size_t rr = 0; rr < rows(); ++rr)
       for (size_t cc = 0; cc < cols(); ++cc)
         for (size_t kk = 0; kk < cols(); ++kk)
-          new_backend[rr][cc] += get_entry(rr, kk) * other.get_entry(kk, cc);
-    *backend_ = new_backend;
-  }
-
-  template <class OtherMatrixImp>
-  void rightmultiply(const Dune::DenseMatrix<OtherMatrixImp>& other)
-  {
-    ensure_uniqueness();
-    BackendType new_backend(rows(), other.cols(), ScalarType(0.));
-    if (other.rows() != cols())
-      DUNE_THROW(Dune::XT::Common::Exceptions::shapes_do_not_match,
-                 "For rightmultiply, the number of columns of this has to match the number of rows of other!");
-    for (size_t rr = 0; rr < rows(); ++rr)
-      for (size_t cc = 0; cc < cols(); ++cc)
-        for (size_t kk = 0; kk < cols(); ++kk)
-          new_backend[rr][cc] += get_entry(rr, kk) * other.get_entry[kk][cc];
+          new_backend->get_entry_ref(rr, cc) += get_entry(rr, kk) * M::get_entry(other, kk, cc);
     *backend_ = new_backend;
   }
 
@@ -423,10 +434,9 @@ public:
                               Common::FloatCmp::DefaultEpsilon<ScalarType>::value()) const override
   {
     auto ret = this->copy();
-    for (size_t ii = 0; ii < rows(); ++ii)
-      for (size_t jj = 0; jj < cols(); ++jj)
-        if (XT::Common::FloatCmp::eq<Common::FloatCmp::Style::absolute>(ScalarType(0.), ret.get_entry(ii, jj), 0., eps))
-          ret.set_entry(ii, jj, 0.);
+    for (auto& entry : ret.backend_->entries_)
+      if (XT::Common::FloatCmp::eq<Common::FloatCmp::Style::absolute>(ScalarType(0.), entry, 0., eps))
+        entry = ScalarType(0);
     return ret;
   } // ... pruned(...)
 
@@ -447,33 +457,6 @@ protected:
   } // ... ensure_uniqueness(...)
 
 private:
-  template <class FirstTraits, class SecondTraits, class anything = void>
-  struct mv_helper
-  {
-    static void mv(const VectorInterface<FirstTraits, ScalarType>& xx,
-                   VectorInterface<SecondTraits, ScalarType>& yy,
-                   const ThisType* this_ptr)
-    {
-      yy *= ScalarType(0.);
-      for (size_t rr = 0; rr < this_ptr->rows(); ++rr)
-        for (size_t cc = 0; cc < this_ptr->cols(); ++cc)
-          yy.add_to_entry(rr, this_ptr->get_entry(rr, cc) * xx.get_entry(cc));
-    }
-  };
-
-  template <class anything>
-  struct mv_helper<internal::CommonDenseVectorTraits<ScalarType>,
-                   internal::CommonDenseVectorTraits<ScalarType>,
-                   anything>
-  {
-    static void mv(const VectorInterface<internal::CommonDenseVectorTraits<ScalarType>, ScalarType>& xx,
-                   VectorInterface<internal::CommonDenseVectorTraits<ScalarType>, ScalarType>& yy,
-                   const ThisType* this_ptr)
-    {
-      this_ptr->mv(xx.as_imp(), yy.as_imp());
-    }
-  };
-
   mutable std::shared_ptr<BackendType> backend_;
   mutable std::shared_ptr<std::vector<std::mutex>> mutexes_;
   mutable bool unshareable_;
@@ -488,7 +471,7 @@ template <class T>
 struct MatrixAbstraction<LA::CommonDenseMatrix<T>>
     : public LA::internal::MatrixAbstractionBase<LA::CommonDenseMatrix<T>>
 {
-  static const constexpr Common::StorageLayout storage_layout = Common::StorageLayout::other;
+  static const constexpr Common::StorageLayout storage_layout = Common::StorageLayout::dense_row_major;
 
   static inline T* data(LA::CommonDenseMatrix<T>& mat)
   {
diff --git a/dune/xt/la/solver/common.hh b/dune/xt/la/solver/common.hh
index d8719e8bb..e6b31d297 100644
--- a/dune/xt/la/solver/common.hh
+++ b/dune/xt/la/solver/common.hh
@@ -21,6 +21,8 @@
 
 #include <dune/xt/common/configuration.hh>
 
+#include <dune/xt/la/algorithms/qr.hh>
+
 #include <dune/xt/la/container/common.hh>
 
 #include "../solver.hh"
@@ -80,7 +82,8 @@ public:
     const Common::Configuration default_opts = options(type);
     // solve
     try {
-      matrix_.backend().solve(solution.backend(), rhs.backend());
+      auto QR = matrix_;
+      solve_by_qr_decomposition(QR, solution, rhs);
     } catch (FMatrixError&) {
       DUNE_THROW(Exceptions::linear_solver_failed_bc_data_did_not_fulfill_requirements,
                  "The dune-common backend reported 'FMatrixError'!\n"
-- 
GitLab